[med-svn] [beagle] 01/06: Imported Upstream version 4.1~160616-7e4+dfsg

Dylan Aïssi bob.dybian-guest at moszumanska.debian.org
Sat Jun 25 13:05:43 UTC 2016


This is an automated email from the git hooks/post-receive script.

bob.dybian-guest pushed a commit to branch master
in repository beagle.

commit 5e95ccce1c9beceae9620d41a43cd60c9208b632
Author: Dylan Aïssi <bob.dybian at gmail.com>
Date:   Sat Jun 25 00:19:38 2016 +0200

    Imported Upstream version 4.1~160616-7e4+dfsg
---
 blbutil/BGZIPOutputStream.java | 237 ++++++++++++++++++++++++
 blbutil/FileUtil.java          | 161 +++++++++-------
 blbutil/InputIt.java           |  11 +-
 main/CurrentData.java          | 100 +++++-----
 main/Main.java                 |  60 ++++--
 main/MainHelper.java           |   4 +-
 main/WindowWriter.java         | 227 ++++++++++++++++-------
 sample/HaplotypeCoder.java     |  15 +-
 sample/ImputationData.java     |  99 +++++-----
 sample/LSHapBaum.java          |  54 +++---
 sample/RefHapSegs.java         | 108 ++++-------
 vcf/AllData.java               |   4 +-
 vcf/Data.java                  |  18 +-
 vcf/R2Estimator.java           | 153 ++++++++++++++++
 vcf/VcfRecBuilder.java         | 406 +++++++++++++++++++++++++++++++++++++++++
 vcf/VcfWindow.java             | 107 ++---------
 vcf/VcfWriter.java             | 319 ++++++++++----------------------
 17 files changed, 1402 insertions(+), 681 deletions(-)

diff --git a/blbutil/BGZIPOutputStream.java b/blbutil/BGZIPOutputStream.java
new file mode 100644
index 0000000..7c80c91
--- /dev/null
+++ b/blbutil/BGZIPOutputStream.java
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2014 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package blbutil;
+
+import java.io.BufferedOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.zip.CRC32;
+import java.util.zip.Deflater;
+
+/**
+ * <p>Class {@code BGZIPOutputStream} is an output stream filter that performs
+ * BGZIP compression.
+ * </p>
+ * <p>The GZIP file format specification is described
+ * <a href="https://www.ietf.org/rfc/rfc1952.txt">RFC 1952</a>
+ * and the BGZIP file format specification is described in the
+ * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">
+ * Sequence Alignment/Map Format Specification</a>
+ * </p>
+ * <p>Instances of class {@code BGZIPOutputStream} are not thread safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class BGZIPOutputStream extends OutputStream {
+
+    // overhead bytes: 26 w/compression, 31 w/o compression
+    private static final boolean USE_GZIP = true;
+    private static final int NOCOMPRESS_XTRA_BYTES = 5;
+    private static final int MAX_INPUT_BYTES = (1 << 16) - 31;
+    private static final int MAX_OUTPUT_BYTES =
+            (MAX_INPUT_BYTES + NOCOMPRESS_XTRA_BYTES);
+
+    private final boolean writeEmptyBlock;
+    private final Deflater gzipDef;
+    private final CRC32 crc;
+    private final OutputStream os;
+
+    private final byte[] input = new byte[MAX_INPUT_BYTES];
+    private final byte[] output = new byte[MAX_OUTPUT_BYTES + 1];
+
+    private int iSize = 0;
+
+    /**
+     * Applies BGZIP compression on the specified files. The filename of
+     * each compressed file will be the original filename followed by ".gz".
+     * The original files are not deleted or overwritten.  The program
+     * exits with an error message if any input filename ends with ".gz".
+     *
+     * @param args a list of files that will be compressed
+     * @throws IOException if an I/O error occurs
+     */
+    public static void main(String[] args) throws IOException {
+        if (args.length == 0) {
+            Utilities.exit("usage: java -ea -jar bgzip.jar [file1] [file2] ...");
+        }
+        for (String name : args) {
+            if (name.endsWith(".gz")) {
+                Utilities.exit("ERROR: input filename ends in \".gz\"");
+            }
+            File inFile = new File(name);
+            File outFile = new File(name + ".gz");
+            try (InputStream is = new FileInputStream(inFile)) {
+                try (FileOutputStream fos = new FileOutputStream(outFile);
+                        BufferedOutputStream bos = new BufferedOutputStream(fos);
+                        BGZIPOutputStream os = new BGZIPOutputStream(bos, true)) {
+                    byte[] ba = new byte[2000];
+                    int len = is.read(ba);
+                    while (len != -1) {
+                        os.write(ba, 0, len);
+                        len = is.read(ba);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Creates a new {@code BGZIPOutputStream} instance that writes
+     * to the specified output stream.
+     *
+     * @param os the output stream
+     * @param writeEmptyBlock {@code true} if the {@code close()} method will
+     * write an empty BGZIP block to the end of the stream
+     *
+     * @throws NullPointerException if {@code os == null}
+     */
+    public BGZIPOutputStream(OutputStream os, boolean writeEmptyBlock) {
+        if (os==null) {
+            throw new NullPointerException(OutputStream.class.toString());
+        }
+        this.writeEmptyBlock = writeEmptyBlock;
+        this.gzipDef = new Deflater(Deflater.DEFAULT_COMPRESSION, USE_GZIP);
+        this.os = os;
+        this.crc = new CRC32();
+        assert crc.getValue()==0;
+    }
+
+    private void compressAndFlushBuffer() throws IOException {
+        crc.update(input, 0, iSize);
+        int crc32 = (int) crc.getValue();
+
+        gzipDef.setInput(input, 0, iSize);
+        gzipDef.finish();
+        int len = gzipDef.deflate(output, 0, output.length, Deflater.SYNC_FLUSH);
+        if (len > MAX_OUTPUT_BYTES) {
+            len = setOutputNoCompression();
+        }
+        writeBgzipBlock(iSize, crc32, output, len, os);
+        gzipDef.reset();
+        crc.reset();
+        iSize = 0;
+    }
+
+    /* Returns the number of bytes written to the output array */
+    private int setOutputNoCompression() {
+        output[0] = 1;
+        output[1] = (byte) (iSize & 0xff);
+        output[2] = (byte) ((iSize >> 8) & 0xff);
+        output[3] = (byte) (~output[1]);
+        output[4] = (byte) (~output[2]);
+        System.arraycopy(input, 0, output, 5, iSize);
+        return iSize + 5;
+    }
+
+    private static void writeBgzipBlock(int iSize, int crc32,
+            byte[] out, int outLength, OutputStream os) throws IOException {
+        if (iSize > (1<<16)) {
+            throw new IllegalArgumentException(String.valueOf(iSize));
+        }
+        writeBGZIPHeader(outLength, os);
+        os.write(out, 0, outLength);
+        writeInt(crc32, os);
+        writeInt(iSize, os);
+    }
+
+    private static void writeBGZIPHeader(int nCompressedBytes, OutputStream os)
+            throws IOException {
+        int bsize = nCompressedBytes + 25;
+        if ((bsize >> 16) != 0) {
+            throw new IllegalArgumentException(String.valueOf(nCompressedBytes));
+        }
+        byte[] ba = new byte[]
+            {   31, (byte) 139,     // GZIP_MAGIC
+                Deflater.DEFLATED,  // CM
+                4,                  // FLG
+                0, 0, 0, 0,         // MTIME
+                0,                  // XFL
+                (byte) 255,         // OS
+                6, 0,               // XLEN
+                66, 67,             // BGZIP_SUBFIELD_MAGIC
+                2, 0,               // SLEN  (Subfield LENgth)
+                (byte)(bsize & 0xff),
+                (byte)((bsize >> 8) & 0xff) // BGZIP block size - 1
+            };
+        os.write(ba);
+    }
+
+    private static void writeInt(int i, OutputStream os) throws IOException {
+        os.write((byte)(i & 0xff));
+        os.write((byte)((i >> 8) & 0xff));
+        os.write((byte)((i >> 16) & 0xff));
+        os.write((byte)((i >> 24) & 0xff));
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        input[iSize++] = (byte) b;
+        if (iSize==MAX_INPUT_BYTES) {
+            compressAndFlushBuffer();
+        }
+    }
+
+    @Override
+    public void write(byte[] ba) throws IOException {
+        write(ba, 0, ba.length);
+    }
+
+    @Override
+    public void write(byte[] buf, int off, int len)
+            throws IOException {
+        int availSize = input.length - iSize;
+        while ((len - off) >= availSize) {
+            System.arraycopy(buf, off, this.input, iSize, availSize);
+            iSize += availSize;
+            off += availSize;
+            len -= availSize;
+            assert iSize==MAX_INPUT_BYTES;
+            compressAndFlushBuffer();
+            assert iSize==0;
+            availSize = this.input.length - iSize;
+        }
+        if (len>0) {
+            System.arraycopy(buf, off, this.input, iSize, len);
+            iSize += len;
+        }
+    }
+
+    @Override
+    public void flush() throws IOException {
+        compressAndFlushBuffer();
+        os.flush();
+    }
+
+    @Override
+    public void close() throws IOException {
+        if (iSize > 0) {
+            compressAndFlushBuffer();
+        }
+        if (writeEmptyBlock) {
+            compressAndFlushBuffer();
+        }
+        os.close();
+    }
+
+}
diff --git a/blbutil/FileUtil.java b/blbutil/FileUtil.java
index 8457dfc..c9b5657 100644
--- a/blbutil/FileUtil.java
+++ b/blbutil/FileUtil.java
@@ -18,21 +18,16 @@
  */
 package blbutil;
 
-import java.io.BufferedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.BufferedWriter;
-import java.io.DataInputStream;
-import java.io.DataOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.FileWriter;
 import java.io.IOException;
-import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.util.zip.GZIPOutputStream;
-import net.sf.samtools.util.BlockCompressedOutputStream;
 
 /**
  * Class {@code FileUtil} contains static methods for working with files.
@@ -46,45 +41,65 @@ public class FileUtil {
     }
 
     /**
-     * Returns a buffered {@code java.io.DataInputStream} reading from the
+     * Returns an unbuffered {@code java.io.FileInputStream} reading from the
      * specified file.  If the input stream cannot be opened, an error message
      * will be printed and the java interpreter will exit.
      * @param file a file
-     * @return a buffered {@code java.io.DataInputStream} reading from the
-     * specified file
+     * @return an unbuffered {@code java.io.FileInputStream}
      * @throws NullPointerException if {@code file == null}
      */
-    public static DataInputStream dataInputStream(File file) {
-        DataInputStream dis = null;
+    public static FileInputStream fileInputStream(File file) {
+        FileInputStream fis = null;
         try {
-            dis = new DataInputStream(new BufferedInputStream(
-                    new FileInputStream(file)));
+            fis = new FileInputStream(file);
         } catch (FileNotFoundException e) {
             Utilities.exit("Error opening " + file, e);
         }
-        return dis;
+        return fis;
     }
 
     /**
-     * Returns a buffered {@code java.io.DataOutputStream} writing to
-     * the specified file.  Any existing file corresponding to the
-     * {@code File} object will be deleted.   If the file cannot be opened,
-     * an error message will be printed and the java interpreter will exit.
+     * Returns an unbuffered {@code java.io.FileOutputStream}. If the file
+     * cannot be opened for writing, an error message will be printed and the
+     * Java Virtual Machine will exit.  If the specified file exists, bytes
+     * written by the returned {@code FileOutputSream} will overwrite the
+     * previously existing file.
+     *
      * @param file a file
-     * @return a buffered {@code java.io.DataOutputStream} writing to
-     * the specified file
+     * @return an unbuffered {@code java.io.PrintWriter}
      * @throws NullPointerException if {@code file == null}
      */
-    public static DataOutputStream dataOutputStream(File file) {
-        OutputStream dos = null;
+    public static FileOutputStream fileOutputStream(File file) {
+        FileOutputStream fos = null;
         try {
-            dos = new FileOutputStream(file);
+            fos = new FileOutputStream(file);
         } catch (FileNotFoundException e) {
             Utilities.exit("Error opening " + file, e);
         }
-        DataOutputStream out = new DataOutputStream(
-                new BufferedOutputStream(dos));
-        return out;
+        return fos;
+    }
+
+    /**
+     * Returns an unbuffered {@code java.io.FileOutputStream}. If the file
+     * cannot be opened for writing, an error message will be printed and the
+     * Java Virtual Machine will exit.  If the specified file exists and
+     * {@code append} is {@code false}, bytes written by the returned
+     * {@code PrintWriter} will overwrite the previously existing file.
+     *
+     * @param file a file
+     * @param append {@code true} if bytes will be appended to the end of
+     * the file
+     * @return an unbuffered {@code java.io.PrintWriter}
+     * @throws NullPointerException if {@code file == null}
+     */
+    public static FileOutputStream fileOutputStream(File file, boolean append) {
+        FileOutputStream fos = null;
+        try {
+            fos = new FileOutputStream(file, append);
+        } catch (FileNotFoundException e) {
+            Utilities.exit("Error opening " + file, e);
+        }
+        return fos;
     }
 
     /**
@@ -102,10 +117,10 @@ public class FileUtil {
     /**
      * Returns a buffered {@code java.io.PrintWriter} writing to
      * the specified file.  The resulting file will be compressed using
-     * the GZIP compression algorithm.  Any existing file corresponding
-     * to the specified file will be deleted.  If the file
-     * cannot be opened, an error message will be printed and the
-     * java interpreter will exit.
+     * the GZIP compression algorithm.  If the file cannot be opened, an
+     * error message will be printed and the java interpreter will exit.
+     * If the specified file exists, bytes written by the returned
+     * {@code PrintWriter} will overwrite the previously existing file.
      * @param file a file
      * @return a {@code java.io.PrintWriter} writing to the specified file
      * @throws NullPointerException if {@code file == null}
@@ -113,8 +128,10 @@ public class FileUtil {
     public static PrintWriter gzipPrintWriter(File file) {
         PrintWriter out = null;
         try {
-            out = new PrintWriter(
-                    new GZIPOutputStream(new FileOutputStream(file)));
+            FileOutputStream fos = new FileOutputStream(file);
+            BufferedOutputStream bos = new BufferedOutputStream(fos);
+            GZIPOutputStream gzos = new GZIPOutputStream(bos);
+            out = new PrintWriter(gzos);
         } catch (IOException e) {
             Utilities.exit("Error opening " + file, e);
         }
@@ -122,36 +139,56 @@ public class FileUtil {
     }
 
     /**
-     * Returns a buffered {@code java.io.PrintWriter} writing to
-     * the specified file.  The resulting file will be compressed using
-     * the BGZIP compression algorithm.  Any existing file corresponding
-     * to the specified file will be deleted.  If the file
-     * cannot be opened, an error message will be printed and the
-     * java interpreter will exit.
+     * Returns a buffered {@code java.io.PrintWriter} that compresses
+     * data using the BGZIP algorithm and writes the compressed data
+     * to the specified file. The {@code close()} method of the returned
+     * {@code PrintWriter} will write an empty BGZIP block to the end of the
+     * output stream. If the file cannot be opened for writing, an error
+     * message will be printed and the Java Virtual Machine will exit.
+     * If the specified file exists, bytes written by the returned
+     * {@code PrintWriter} will overwrite the previously existing file.
      *
      * @param file a file
-     * @return a buffered {@code java.io.PrintWriter} writing to
-     * the specified file
+     * @return a buffered {@code java.io.PrintWriter}
      * @throws NullPointerException if {@code file == null}
      */
     public static PrintWriter bgzipPrintWriter(File file) {
-        PrintWriter out = null;
-        try {
-            OutputStream fout = new FileOutputStream(file);
-            out = new PrintWriter(new BlockCompressedOutputStream(
-                    new BufferedOutputStream(fout), file));
-        } catch (FileNotFoundException e) {
-            Utilities.exit("Error opening " + file, e);
-        }
-        return out;
+        boolean writeBuffer = true;
+        FileOutputStream fos = fileOutputStream(file);
+        BufferedOutputStream bos = new BufferedOutputStream(fos);
+        return new PrintWriter(new BGZIPOutputStream(bos, writeBuffer));
     }
 
     /**
-     * Returns a buffered {@code java.io.PrintWriter} writing to
-     * the specified file.  Any existing file corresponding
-     * to the specified filename will be deleted.  If the file
-     * cannot be opened, an error message will be printed and the
-     * java interpreter will exit.
+     * Returns a buffered {@code java.io.PrintWriter} that compresses
+     * data using the BGZIP algorithm and writes the compressed data to
+     * the specified file. The {@code close()} method of the returned
+     * {@code PrintWriter} will write an empty BGZIP block to the end of the
+     * output stream. If the file cannot be opened for writing, an error
+     * message will be printed and the Java Virtual Machine will exit.
+     * If the specified file exists and {@code append} is {@code false}, bytes
+     * written by the returned {@code PrintWriter} will overwrite the
+     * previously existing file.
+     *
+     * @param file a file
+     * @param append {@code true} if bytes will be appended to the end of
+     * the file
+     * @return a buffered {@code java.io.PrintWriter}
+     * @throws NullPointerException if {@code file == null}
+     */
+    public static PrintWriter bgzipPrintWriter(File file, boolean append) {
+        boolean writeBuffer = true;
+        FileOutputStream fos = fileOutputStream(file, append);
+        BufferedOutputStream bos = new BufferedOutputStream(fos);
+        return new PrintWriter(new BGZIPOutputStream(bos, writeBuffer));
+    }
+
+    /**
+     * Returns a buffered {@code java.io.PrintWriter} writing to the
+     * specified file.  If the file cannot be opened, an error message
+     * will be printed and the Java Virtual Machine will exit.  If the specified
+     * file exists, bytes written by  the returned {@code PrintWriter} will
+     * overwrite the previously existing file.
      * @param file a file
      * @return a buffered {@code java.io.PrintWriter} writing to
      * the specified file
@@ -163,10 +200,10 @@ public class FileUtil {
 
     /**
      * Returns a buffered {@code java.io.PrintWriter} writing to
-     * the specified file. If {@code append == false}
-     * any existing file corresponding to the specified file will be deleted.
-     * If the file cannot be opened, an error message will be printed and the
-     * java interpreter will exit.
+     * the specified file. If the file cannot be opened, an error message will
+     * be printed and the Java Virtual Machine will exit.  If the specified
+     * file exists and {@code append} is {@code false}, bytes written by the
+     * returned {@code PrintWriter} will overwrite the previously existing file.
      *
      * @param file a file
      * @param append {@code true} if the data will be appended
@@ -187,11 +224,11 @@ public class FileUtil {
     }
 
     /**
-     * Returns a non-buffered {@code java.io.PrintWriter} writing to
-     * the specified file.
-     * If {@code append == false} any existing file corresponding
-     * to the specified file will be deleted. If the file cannot be opened,
-     * an error message will be printed and the java interpreter will exit.
+     * Returns an unbuffered {@code java.io.PrintWriter} writing to
+     * the specified file. If the file cannot be opened, an error message will
+     * be printed and the Java Virtual Machine will exit.  If the specified
+     * file exists and {@code append} is {@code false}, bytes written by the
+     * returned {@code PrintWriter} will overwrite the previously existing file.
      *
      * @param file a file
      * @param append {@code true} if the data will be appended
diff --git a/blbutil/InputIt.java b/blbutil/InputIt.java
index eb34f31..6792749 100644
--- a/blbutil/InputIt.java
+++ b/blbutil/InputIt.java
@@ -208,7 +208,7 @@ public class InputIt implements FileIt<String> {
             if (file.getName().endsWith(".gz")) {
                 if (isBGZipFile(file)) {
                     return new InputIt(
-                            new BlockCompressedInputStream(is), file);
+		            new BlockCompressedInputStream(is), file);
                 }
                 else {
                     return new InputIt(new GZIPInputStream(is), file);
@@ -248,11 +248,12 @@ public class InputIt implements FileIt<String> {
             if (file.getName().endsWith(".gz")) {
                 if (isBGZipFile(file)) {
                     return new InputIt(
-                            new BlockCompressedInputStream(is), file, bufferSize);
+		            new BlockCompressedInputStream(is), file, bufferSize);
                 }
                 else {
                     return new InputIt(new GZIPInputStream(is), file, bufferSize);
                 }
+
             }
             else {
                 return new InputIt(is, file);
@@ -270,9 +271,9 @@ public class InputIt implements FileIt<String> {
 
     private static boolean isBGZipFile(File file) throws IOException {
         try (InputStream is=new BufferedInputStream(new FileInputStream(file))) {
-            return BlockCompressedInputStream.isValidFile(is);
-        }
-     }
+		return BlockCompressedInputStream.isValidFile(is);
+	    }
+    }
 
      /**
      * Constructs and returns an {@code InputIt} instance with the default
diff --git a/main/CurrentData.java b/main/CurrentData.java
index d6fbf3f..f818aea 100644
--- a/main/CurrentData.java
+++ b/main/CurrentData.java
@@ -44,11 +44,11 @@ public class CurrentData {
 
     private final int window;
     private final SampleHapPairs initHaps;
-    private final int prevSplice;
-    private final int nextOverlap;
-    private final int nextSplice;
-    private final int nextTargetSplice;
-    private final int nextTargetOverlap;
+    private final int prevSpliceStart;
+    private final int nextOverlapStart;
+    private final int nextSpliceStart;
+    private final int nextTargetSpliceStart;
+    private final int nextTargetOverlapStart;
 
     private final GL targetGL;
     private final NuclearFamilies families;
@@ -104,11 +104,11 @@ public class CurrentData {
         }
         this.window = data.window();
         this.initHaps = overlapHaps;
-        this.prevSplice = data.overlap()/2;
-        this.nextOverlap = nextOverlap(data, par.overlap());
-        this.nextSplice = nextSplice(data, par.overlap());
-        this.nextTargetOverlap = targetIndex(data, nextOverlap);
-        this.nextTargetSplice = targetIndex(data, nextSplice);
+        this.prevSpliceStart = data.overlap()/2;
+        this.nextOverlapStart = CurrentData.this.nextOverlapStart(data, par.overlap());
+        this.nextSpliceStart = (data.nMarkers() + nextOverlapStart)/2;
+        this.nextTargetOverlapStart = targetIndex(data, nextOverlapStart);
+        this.nextTargetSpliceStart = targetIndex(data, nextSpliceStart);
 
         this.families = families;
         this.weights = new Weights(families);
@@ -130,18 +130,26 @@ public class CurrentData {
         this.recombRate = recombRate(targetMarkers, genMap, par.mapscale());
     }
 
-    /* returns the first index in the next overlap */
-    private static int nextOverlap(Data data, int overlap) {
-        if (data.canAdvanceWindow() && data.lastWindowOnChrom()==false) {
-            return data.nMarkers() - overlap;
+    /* Returns the index of the first marker in the overlap */
+    private static int nextOverlapStart(Data data, int targetOverlap) {
+        if (targetOverlap < 0) {
+            throw new IllegalArgumentException(String.valueOf(targetOverlap));
         }
-        else {
+        if (targetOverlap==0 || data.lastWindowOnChrom()) {
             return data.nMarkers();
         }
+        Markers markers = data.markers();
+        int nextOverlap = Math.max(0, data.nMarkers() - targetOverlap);
+        while (nextOverlap>0
+                && markers.marker(nextOverlap).pos()
+                    == markers.marker(nextOverlap - 1).pos()) {
+            --nextOverlap;
+        }
+        return nextOverlap;
     }
 
-    /* returns the first index after the next splice point */
-    private static int nextSplice(Data data, int overlap) {
+    /* Returns the index of the first marker after the next splice point */
+    private static int nextSpliceStart(Data data, int overlap) {
         if (data.canAdvanceWindow() && data.lastWindowOnChrom()==false) {
             return data.nMarkers() - overlap + (overlap/2);
         }
@@ -188,25 +196,37 @@ public class CurrentData {
     }
 
     /**
-     * Returns the first marker index after the splice point with
-     * the previous marker window. Returns 0 if the current marker window
-     * is the first marker window.
-     * @return the first marker index after the splice point with
-     * the previous marker window
+     * Returns the first marker index in the overlap between this
+     * marker window and the next marker window, or
+     * returns {@code this.nMarkers()} there is no overlap.
+     * @return the first marker index in the overlap between this
+     * marker window and the next marker window
      */
-    public int prevSplice() {
-        return prevSplice;
+    public int nextOverlapStart() {
+        return nextOverlapStart;
     }
 
     /**
-     * Returns the first marker index in the overlap between this
+     * Returns the first target marker index in the overlap between this
      * marker window and the next marker window, or
-     * returns {@code this.nMarkers()} there is no overlap.
-     * @return the first marker index in the overlap between this
+     * returns {@code this.nMarkers()} if there is no overlap or if there are
+     * no target markers in the overlap.
+     * @return the first target marker index in the overlap between this
      * marker window and the next marker window
      */
-    public int nextOverlap() {
-        return nextOverlap;
+    public int nextTargetOverlapStart() {
+        return nextTargetOverlapStart;
+    }
+
+    /**
+     * Returns the first marker index after the splice point with
+     * the previous marker window. Returns 0 if the current marker window
+     * is the first marker window.
+     * @return the first marker index after the splice point with
+     * the previous marker window
+     */
+    public int prevSpliceStart() {
+        return prevSpliceStart;
     }
 
     /**
@@ -216,8 +236,8 @@ public class CurrentData {
      * no markers after the splice point.
      * @return the first marker index after the next splice point
      */
-    public int nextSplice() {
-        return nextSplice;
+    public int nextSpliceStart() {
+        return nextSpliceStart;
     }
 
     /**
@@ -227,31 +247,19 @@ public class CurrentData {
      * @return the first target marker index after the splice point with
      * the previous marker window
      */
-    public int prevTargetSplice() {
+    public int prevTargetSpliceStart() {
         return initHaps==null ? 0 : initHaps.nMarkers();
     }
 
     /**
-     * Returns the first target marker index in the overlap between this
-     * marker window and the next marker window, or
-     * returns {@code this.nMarkers()} if there is no overlap or if there are
-     * no target markers in the overlap.
-     * @return the first target marker index in the overlap between this
-     * marker window and the next marker window
-     */
-    public int nextTargetOverlap() {
-        return nextTargetOverlap;
-    }
-
-    /**
      * Returns the first target marker index after the splice point between this
      * marker window and the next marker window, or returns
      * {@code this.nTargetMarkers()} if there is no overlap or if there are
      * no target markers after the splice point
      * @return the first target marker index after the next splice point
      */
-    public int nextTargetSplice() {
-        return nextTargetSplice;
+    public int nextTargetSpliceStart() {
+        return nextTargetSpliceStart;
     }
 
     /**
diff --git a/main/Main.java b/main/Main.java
index c072110..79b3dfc 100644
--- a/main/Main.java
+++ b/main/Main.java
@@ -65,8 +65,8 @@ public class Main {
     /**
      * The program name and version.
      */
-    public static final String program = "beagle.03May16.862.jar (version 4.1)";
-    public static final String command = "java -jar beagle.03May16.862.jar";
+    public static final String program = "beagle.16Jun16.7e4.jar (version 4.1)";
+    public static final String command = "java -jar beagle.16Jun16.7e4.jar";
 
     /**
      * The copyright string.
@@ -78,7 +78,7 @@ public class Main {
      */
     public static final String shortHelp = Main.program
             + Const.nl + Main.copyright
-            + Const.nl + "Enter \"java -jar beagle.03May16.862.jar\" for a "
+            + Const.nl + "Enter \"java -jar beagle.16Jun16.7e4.jar\" for a "
             + "summary of command line " + "arguments.";
 
     private final Par par;
@@ -108,14 +108,13 @@ public class Main {
         runStats.printStartInfo();
         GeneticMap genMap = geneticMap(par);
 
-        try (Data data = (par.ref()==null) ? nonRefData(par) : allData(par)) {
-            try (WindowWriter winOut = new WindowWriter(data.targetSamples(),
-                    par.out())) {
-                Main main = new Main(par, data, genMap, winOut, runStats);
-                main.phaseData();
-                runStats.printSummaryAndClose(data.nTargetMarkersSoFar(),
-                        data.nMarkersSoFar());
-            }
+        try (Data data = (par.ref()==null) ? nonRefData(par) : allData(par);
+                WindowWriter winOut = new WindowWriter(
+                        data.targetSamples(), par.out())) {
+            Main main = new Main(par, data, genMap, winOut, runStats);
+            main.phaseData();
+            runStats.printSummaryAndClose(data.nTargetMarkersSoFar(),
+                    data.nMarkersSoFar());
         }
     }
 
@@ -141,8 +140,9 @@ public class Main {
         runStats.printSampleSummary(fam, data);
         MainHelper mh = new MainHelper(par, genMap, runStats);
         SampleHapPairs overlapHaps = null;
+        int overlap = 0;
         while (data.canAdvanceWindow()) {
-            advanceWindow();
+            advanceWindow(overlap, par.window());
             CurrentData cd = new CurrentData(par, genMap, data, overlapHaps, fam);
             GenotypeValues gv = gv(par, cd);
             SampleHapPairs targetHapPairs = mh.phase(cd, gv);
@@ -157,6 +157,7 @@ public class Main {
                 printOutput(cd, targetHapPairs, alProbs, ibd);
             }
             overlapHaps = overlapHaps(cd, targetHapPairs);
+            overlap = cd.nMarkers() - cd.nextOverlapStart();
         }
     }
 
@@ -194,17 +195,40 @@ public class Main {
     private void printOutput(CurrentData cd, SampleHapPairs targetHapPairs,
             AlleleProbs alProbs, Map<IntPair, List<IbdSegment>> ibd) {
         assert par.gt()!=null;
-        windowOut.print(cd, targetHapPairs, alProbs, par.gprobs());
+        boolean markersAreImputed = cd.nTargetMarkers() < cd.nMarkers();
+        boolean[] isImputed = isImputed(cd);
+        int start = cd.prevSpliceStart();
+        int end = cd.nextSpliceStart();
+        boolean dose = markersAreImputed;
+        boolean gprobs = markersAreImputed && par.gprobs();
+        int nThreads = par.nthreads();
+        if (markersAreImputed){
+            alProbs = new ConstrainedAlleleProbs(targetHapPairs, alProbs,
+                    cd.targetMarkerIndices());
+        }
+        windowOut.print(alProbs, isImputed, start, end, dose, gprobs, nThreads);
         if (par.ibd()) {
             windowOut.printIbd(cd, ibd);
         }
     }
 
+    private static boolean[] isImputed(CurrentData cd) {
+        boolean[] ba = new boolean[cd.nMarkers()];
+        if (cd.nTargetMarkers()<ba.length) {
+            for (int j=0; j<ba.length; ++j) {
+                if (cd.targetMarkerIndex(j) == -1) {
+                    ba[j] = true;
+                }
+            }
+        }
+        return ba;
+    }
+
     private SampleHapPairs overlapHaps(CurrentData cd,
             SampleHapPairs targetHapPairs) {
-        int nextOverlap = cd.nextTargetOverlap();
-        int nextSplice = cd.nextTargetSplice();
-        if (cd.nextOverlap() == cd.nextSplice()) {
+        int nextOverlap = cd.nextTargetOverlapStart();
+        int nextSplice = cd.nextTargetSpliceStart();
+        if (cd.nextOverlapStart() == cd.nextSpliceStart()) {
             return null;
         }
         int nSamples = targetHapPairs.nSamples();
@@ -434,9 +458,9 @@ public class Main {
         }
     }
 
-    private void advanceWindow() {
+    private void advanceWindow(int overlap, int window) {
         if (data.canAdvanceWindow()) {
-            data.advanceWindow(par.overlap(), par.window());
+            data.advanceWindow(overlap, window);
         }
         runStats.printWindowUpdate(data);
     }
diff --git a/main/MainHelper.java b/main/MainHelper.java
index 57e0095..dd68eb4 100644
--- a/main/MainHelper.java
+++ b/main/MainHelper.java
@@ -162,8 +162,8 @@ public class MainHelper {
     }
 
     private List<HapPair> correctGenotypes(CurrentData cd, List<HapPair> hapPairs) {
-        int start = cd.prevTargetSplice();
-        int end = cd.nextTargetSplice();
+        int start = cd.prevTargetSpliceStart();
+        int end = cd.nextTargetSpliceStart();
         GL modGL = new MaskedEndsGL(cd.targetGL(), start, end);
 //        File outFile = new File(par.out() + ".gterr");
 //        boolean append = cd.window() > 1;
diff --git a/main/WindowWriter.java b/main/WindowWriter.java
index ee0d794..2a7d6a8 100644
--- a/main/WindowWriter.java
+++ b/main/WindowWriter.java
@@ -19,19 +19,30 @@
 package main;
 
 import beagleutil.Samples;
+import blbutil.BGZIPOutputStream;
 import blbutil.Const;
 import blbutil.FileUtil;
 import blbutil.IntPair;
-import haplotype.SampleHapPairs;
+import blbutil.Utilities;
 import ibd.IbdSegment;
+import java.io.BufferedOutputStream;
+import java.io.ByteArrayOutputStream;
 import java.io.Closeable;
 import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.text.DecimalFormat;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
 import vcf.VcfWriter;
 
 /**
@@ -46,14 +57,13 @@ public class WindowWriter implements Closeable {
 
     private static final DecimalFormat df2 = new DecimalFormat("#.##");
 
-    private boolean isClosed = false;
     private boolean appendIbd = false;
 
     private final Samples samples;
+    private final String outPrefix;
     private final File vcfOutFile;
     private final File ibdOutFile;
     private final File hbdOutFile;
-    private final PrintWriter vcfOut;
     private final Map<IntPair, IbdSegment> ibdBuffer = new HashMap<>();
 
     /**
@@ -73,47 +83,56 @@ public class WindowWriter implements Closeable {
             throw new IllegalArgumentException("outPrefix.length()==0");
         }
         this.samples = samples;
+        this.outPrefix = outPrefix;
         this.vcfOutFile = new File(outPrefix + ".vcf.gz");
         this.ibdOutFile = new File(outPrefix + ".ibd");
         this.hbdOutFile = new File(outPrefix + ".hbd");
-        this.vcfOut = FileUtil.bgzipPrintWriter(vcfOutFile);
 
-        boolean printGT = true;
-        boolean printGP = true;
-        boolean printGL = false;
-        VcfWriter.writeMetaLines(samples.ids(), Main.program,
-                printGT, printGP, printGL, vcfOut);
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        try (PrintWriter vcfOut=new PrintWriter(
+                new BGZIPOutputStream(baos, false))) {
+            boolean printGT = true;
+            boolean printGP = true;
+            boolean printGL = false;
+            VcfWriter.writeMetaLines(samples.ids(), Main.program,
+                    printGT, printGP, printGL, vcfOut);
+        }
+        boolean append = false;
+        writeBytesToFile(baos.toByteArray(), vcfOutFile, append);
     }
 
     /**
-     * Returns the samples whose data is written by {@code this}.
-     * @return the samples whose data is written by {@code this}
+     * Returns the output file prefix.
+     * @return the output file prefix
      */
-    public Samples samples() {
-        return samples;
+    public String outPrefix() {
+        return outPrefix;
     }
 
+    private static void write(byte[] ba, OutputStream out) {
+        try {
+            out.write(ba);
+        } catch (IOException e) {
+            Utilities.exit("Error writing byte", e);
+        }
+    }
 
-    /**
-     * Returns {@code true} if {@code this.close()} method has
-     * been previously invoked and returns {@code false} otherwise.
-     *
-     * @return {@code true} if {@code this.close()} method has
-     * been previously invoked
-     */
-    public boolean isClosed() {
-        return isClosed;
+    private static void writeBytesToFile(byte[] ba, File file, boolean append) {
+        try {
+            try (FileOutputStream fos=new FileOutputStream(file, append)) {
+                fos.write(ba);
+            }
+        } catch (IOException e) {
+            Utilities.exit("Error writing to file: " + file, e);
+        }
     }
 
     /**
-     * Closes this {@code WindowWriter} for writing.  Calling the
-     * {@code print()} method after invoking {@code close()} will
-     * throw an {@code IllegalStateException}.
+     * Returns the samples whose data is written by {@code this}.
+     * @return the samples whose data is written by {@code this}
      */
-    @Override
-    public void close() {
-        vcfOut.close();
-        isClosed = true;
+    public Samples samples() {
+        return samples;
     }
 
     /**
@@ -127,58 +146,116 @@ public class WindowWriter implements Closeable {
      * @throws NullPointerException if {@code cd == null || gv == null}
      */
     public void printGV(CurrentData cd, GenotypeValues gv) {
-        if (isClosed) {
-            throw new IllegalStateException("isClosed()==true");
+        boolean append = true;
+        try (PrintWriter vcfOut = FileUtil.bgzipPrintWriter(vcfOutFile, append)) {
+            VcfWriter.appendRecords(gv, cd.prevTargetSpliceStart(),
+                    cd.nextTargetSpliceStart(), vcfOut);
         }
-        VcfWriter.appendRecords(gv, cd.prevTargetSplice(),
-                cd.nextTargetSplice(), vcfOut);
-        vcfOut.flush();
     }
 
     /**
      * Prints the data in {@code alProbs} for markers
      * with index between {@code cd.lastSplice()} (inclusive) and
-     * {@code cd.nextSplice()} (exclusive).
+     * {@code cd.nextSplice()} (exclusive) to the output
+     * VCF file: {@code this.outPrefix() + ".vcf.gz"}.
      *
-     * @param cd the input data for the current marker window
-     * @param targetHapPairs the target haplotype pairs
      * @param alProbs the estimated haplotype allele probabilities
-     * @param gprobs {@code true} if the GP field should be printed when
-     * imputed markers are present, and {@code false} otherwise
+     * @param isImputed an array of length {@code alProbs.nMarkers()}
+     * whose {@code j}-th element is {@code true} if the corresponding
+     * marker is imputed, and {@code false} otherwise
+     * @param start the starting marker index (inclusive)
+     * @param end the ending marker index (exclusive)
+     * @param dose {@code true} if the output FORMAT fields should contain
+     * a DS subfield, and {@code false} otherwise
+     * @param gprobs {@code true} if the output FORMAT fields should contain
+     * a GP subfield, and {@code false} otherwise
+     * @param nThreads the number of parallel threads to use
      *
-     * @throws IllegalStateException if {@code this.isClosed() == true}
-     * @throws IllegalArgumentException if
-     * {@code this.samples().equals(cd.targetSamples()) == false}
-     * @throws IllegalArgumentException if
-     * {@code this.samples().equals(alProbs.samples()) == false}
      * @throws IllegalArgumentException if
-     * {@code cd.markers().equals(alProbs.markers()) == false}
+     * {@code isImputed.length != alProbs.nMarkers()}
+     * @throws IllegalArgumentException if {@code nThreads < 1}
+     * @throws IndexOutOfBoundsException if
+     * {@code start < 0 || end > alProbs.nMarkers() || start > end}
      * @throws NullPointerException if
-     * {@code cd == null || targetHapPairs == null || alProbs == null}
+     * {@code alProbs == null || isImputed == null}
      */
-    public void print(CurrentData cd, SampleHapPairs targetHapPairs,
-            AlleleProbs alProbs, boolean gprobs) {
-        if (isClosed) {
-            throw new IllegalStateException("isClosed()==true");
+    public void print(AlleleProbs alProbs, boolean[] isImputed,
+            int start, int end, boolean dose, boolean gprobs, int nThreads) {
+        int step = nMarkersPerStep(alProbs.nSamples(), dose, gprobs);
+        int nSteps = nSteps(end-start, step);
+        final AtomicInteger atomicInt = new AtomicInteger(0);
+        final ConcurrentHashMap<Integer, byte[]> map = new ConcurrentHashMap<>();
+        ExecutorService es = Executors.newFixedThreadPool(nThreads);
+        for (int j=0; j<nThreads; ++j) {
+            es.submit(
+                () -> {
+                    try {
+                        int index = atomicInt.getAndIncrement();
+                        while (index < nSteps) {
+                            int segStart = start + step*index;
+                            int segEnd = Math.min(segStart + step, end);
+                            ByteArrayOutputStream baos = new ByteArrayOutputStream();
+                            try (PrintWriter vcfOut=new PrintWriter(
+                                    new BGZIPOutputStream(baos, false))) {
+                                VcfWriter.appendRecords(alProbs, isImputed,
+                                        segStart, segEnd, dose, gprobs, vcfOut);
+                            }
+                            map.put(index, baos.toByteArray());
+                            index = atomicInt.getAndIncrement();
+                        }
+                    }
+                    catch (Exception ex) {
+                        Utilities.exit("", ex);
+                    }
+                }
+            ) ;
         }
-        if (cd.markers().equals(alProbs.markers())==false) {
-            throw new IllegalArgumentException("inconsistent markers");
+        try {
+            es.shutdown();
+            es.awaitTermination(Long.MAX_VALUE, TimeUnit.DAYS);
         }
-        if (samples.equals(cd.targetSamples()) == false
-                || samples.equals(alProbs.samples()) == false) {
-            throw new IllegalArgumentException("inconsistent samples");
+        catch (Throwable e) {
+            Utilities.exit("ERROR", e);
+        }
+        print(map, vcfOutFile);
+    }
+
+    private static void print(ConcurrentHashMap<Integer, byte[]> map,
+            File outFile)  {
+        boolean append = true;
+        int index = 0;
+        try {
+            try (OutputStream fos = new BufferedOutputStream(
+                    new FileOutputStream(outFile, append))) {
+                byte[] bytes = map.get(index++);
+                while (bytes!=null) {
+                    write(bytes, fos);
+                    bytes = map.get(index++);
+                }
+            }
+        } catch (IOException e) {
+            Utilities.exit("Error writing to file: " + outFile, e);
+        }
+    }
+
+    private static int nMarkersPerStep(int nSamples, boolean dose, boolean gprobs) {
+        int nBytesPerStep = 65536*50;
+        int bytesPerSample = 4;
+        if (dose) {
+            bytesPerSample += 2;
         }
-        int start = cd.prevSplice();
-        int end = cd.nextSplice();
-        if (cd.nTargetMarkers() < cd.nMarkers()){
-            ConstrainedAlleleProbs constAlProbs = new ConstrainedAlleleProbs(
-                    targetHapPairs, alProbs, cd.targetMarkerIndices());
-            VcfWriter.appendRecords(constAlProbs, start, end, gprobs, vcfOut);
+        if (gprobs) {
+            bytesPerSample += 6;
         }
-        else {
-            VcfWriter.appendRecords(alProbs, start, end, vcfOut);
+        return nBytesPerStep / (nSamples*bytesPerSample);
+    }
+
+    private static int nSteps(int n, int step) {
+        int nSteps = n / step;
+        if (nSteps * step != n) {
+            ++nSteps;
         }
-        vcfOut.flush();
+        return nSteps;
     }
 
     /**
@@ -198,20 +275,16 @@ public class WindowWriter implements Closeable {
      * @param ibdMap a map whose keys are pairs of haplotype indices and whose
      * values are lists of IBD segments involving the haplotype pair key
      *
-     * @throws IllegalStateException if {@code this.isClosed()==true}
      * @throws IllegalArgumentException if
      * {@code this.samples().equals(cd.targetSamples()) == false}
      * @throws NullPointerException if {@code cd == null || ibdMap == null}
      */
     public void printIbd(CurrentData cd, Map<IntPair, List<IbdSegment>> ibdMap) {
-        if (isClosed) {
-            throw new IllegalStateException("isClosed()==true");
-        }
         if (samples.equals(cd.targetSamples()) == false) {
             throw new IllegalArgumentException("inconsistent samples");
         }
-        printIbd(ibdMap, cd.prevTargetSplice(), cd.nextTargetOverlap(),
-                cd.nextTargetSplice(), cd.nTargetMarkers());
+        printIbd(ibdMap, cd.prevTargetSpliceStart(), cd.nextTargetOverlapStart(),
+                cd.nextTargetSpliceStart(), cd.nTargetMarkers());
         if (appendIbd==false) {
             appendIbd = true;
         }
@@ -279,4 +352,18 @@ public class WindowWriter implements Closeable {
         out.print(Const.tab);
         out.println(df2.format(tract.score()));
     }
+
+    @Override
+    public void close() {
+        boolean append = true;
+        try {
+            try (FileOutputStream fos = new FileOutputStream(vcfOutFile, append);
+                    BufferedOutputStream bos = new BufferedOutputStream(fos);
+                    BGZIPOutputStream bgzip = new BGZIPOutputStream(bos, true)) {
+                // write empty BGZIP block to bgzip by closing bgzip
+            }
+        } catch (IOException e) {
+            Utilities.exit("Error closing file: " + vcfOutFile, e);
+        }
+    }
 }
diff --git a/sample/HaplotypeCoder.java b/sample/HaplotypeCoder.java
index 4ae3782..6a38f34 100644
--- a/sample/HaplotypeCoder.java
+++ b/sample/HaplotypeCoder.java
@@ -26,6 +26,7 @@ import blbutil.IntList;
 import blbutil.WrappedIntArray;
 import haplotype.SampleHapPairs;
 import java.util.Arrays;
+import java.util.stream.IntStream;
 
 /**
  * <p>Class {@code HaplotypeCoder} indexes the observed allele sequences
@@ -101,7 +102,7 @@ public class HaplotypeCoder {
             throw new IllegalArgumentException("start > end");
         }
         IntArray[] val = new IntArray[2];
-        int[] haps = initialHaps();
+        int[] haps = IntStream.range(0, nHaps).toArray();
         int[] alleles = new int[nHaps];
         IntList lastEnds = new IntList(1);
         lastEnds.add(nHaps);
@@ -129,14 +130,6 @@ public class HaplotypeCoder {
         return val;
     }
 
-    private int[] initialHaps() {
-        int[] ia = new int[nHaps];
-        for (int j = 0; j < ia.length; ++j) {
-            ia[j] = j;
-        }
-        return ia;
-    }
-
     private IntList partition(int marker, int[] alleles, int[] haps,
             IntList lastEnds) {
         IntList nextEnds = new IntList( (4*lastEnds.size())/3 + 1 );
@@ -163,10 +156,10 @@ public class HaplotypeCoder {
     }
 
     private void setAlleles(int marker, int[] alleles) {
-        for (int j = 0; j < nRefHaps; ++j) {
+        for (int j=0; j<nRefHaps; ++j) {
             alleles[j] = refHapPairs.allele(marker, j);
         }
-        for (int j = nRefHaps; j < nHaps; ++j) {
+        for (int j = nRefHaps; j<nHaps; ++j) {
             alleles[j] = targetHapPairs.allele(marker, j - nRefHaps);
         }
     }
diff --git a/sample/ImputationData.java b/sample/ImputationData.java
index d44b1d3..42847cb 100644
--- a/sample/ImputationData.java
+++ b/sample/ImputationData.java
@@ -71,23 +71,21 @@ public class ImputationData {
         if (cd.targetSamples().equals(targetHapPairs.samples())==false) {
             throw new IllegalArgumentException("inconsistent samples");
         }
-        int[] gtEnd = gtEnd(targetHapPairs.markers(), map, par.cluster());
-        int[] gtStart = gtStart(gtEnd);
-        this.refAlleles = new IntArray[gtStart.length];
-        this.targAlleles = new IntArray[gtStart.length];
+        int[] targClustEnd = targClustEnd(targetHapPairs.markers(), map, par.cluster());
+        this.refAlleles = new IntArray[targClustEnd.length];
+        this.targAlleles = new IntArray[targClustEnd.length];
         setCodedAlleles(cd.restrictedRefSampleHapPairs(), targetHapPairs,
-                gtStart, gtEnd, refAlleles, targAlleles);
-        this.nClusters = gtStart.length;
+                targClustEnd, refAlleles, targAlleles);
+        this.nClusters = targClustEnd.length;
         this.refHapPairs = cd.refSampleHapPairs();
-        this.refHapSegs = refHapSegs(refHapPairs, gtStart, gtEnd,
-                cd.markerIndices(), par.nthreads());
+        this.refHapSegs = refHapSegs(refHapPairs, targClustEnd, cd.markerIndices());
         this.targHapPairs = targetHapPairs;
-        this.errProb = err(par.err(), gtStart, gtEnd);
-        this.pRecomb = ImputationData.this.pRecomb(refHapSegs, map, par.ne());
+        this.errProb = err(par.err(), targClustEnd);
+        this.pRecomb = ImputationData.pRecomb(refHapSegs, map, par.ne());
         this.weight = wts(refHapSegs, map);
     }
 
-    private static int[] gtEnd(Markers targetMarkers, GeneticMap genMap,
+    private static int[] targClustEnd(Markers targetMarkers, GeneticMap genMap,
             float clusterDist) {
         int nMarkers = targetMarkers.nMarkers();
         int[] ends = new int[nMarkers];
@@ -104,33 +102,29 @@ public class ImputationData {
         return Arrays.copyOf(ends, index);
     }
 
-    private static int[] gtStart(int[] gtEnd) {
-        int[] gtStart = new int[gtEnd.length];
-        for (int j=1; j<gtStart.length; ++j) {
-            gtStart[j] = gtEnd[j - 1];
-        }
-        return gtStart;
-    }
-
     private static void setCodedAlleles(SampleHapPairs refHapPairs,
-            SampleHapPairs targetHapPairs, int[] gtStart,
-            int[] gtEnd, IntArray[] refAlleles, IntArray[] targAlleles) {
+            SampleHapPairs targetHapPairs, int[] targEnd,
+            IntArray[] refAlleles, IntArray[] targAlleles) {
         HaplotypeCoder coder = new HaplotypeCoder(refHapPairs, targetHapPairs);
-        for (int j=0; j<gtStart.length; ++j) {
-            IntArray[] ia = coder.run(gtStart[j], gtEnd[j]);
+        int start = 0;
+        for (int j=0; j<targEnd.length; ++j) {
+            IntArray[] ia = coder.run(start, targEnd[j]);
             refAlleles[j] = ia[0];
             targAlleles[j] = ia[1];
+            start = targEnd[j];
         }
     }
 
-    private static float[] err(float errRate, int[] gtStart, int[] gtEnd) {
+    private static float[] err(float errRate, int[] targClustEnd) {
         float maxErrProb = 0.5f;
-        float[] err = new float[gtStart.length];
+        float[] err = new float[targClustEnd.length];
+        int start = 0;
         for (int j=0; j<err.length; ++j) {
-            err[j] = errRate * (gtEnd[j] - gtStart[j]);
+            err[j] = errRate * (targClustEnd[j] - start);
             if (err[j] > maxErrProb) {
                 err[j] = maxErrProb;
             }
+            start = targClustEnd[j];
         }
         return err;
     }
@@ -146,10 +140,10 @@ public class ImputationData {
     }
 
     private static int[] midPos(Markers refMarkers, RefHapSegs refHapSegs) {
-        int[] midPos = new int[refHapSegs.nClusters()];
+        int[] midPos = new int[refHapSegs.nSegs() - 1];
         for (int j=0; j<midPos.length; ++j) {
-            int startPos = refMarkers.marker(refHapSegs.clusterStart(j)).pos();
-            int endPos = refMarkers.marker(refHapSegs.clusterEnd(j) - 1).pos();
+            int startPos = refMarkers.marker(refHapSegs.segStart(j+1)).pos();
+            int endPos = refMarkers.marker(refHapSegs.segEnd(j) - 1).pos();
             midPos[j] = (startPos + endPos) / 2;
         }
         return midPos;
@@ -174,15 +168,15 @@ public class ImputationData {
         Markers refMarkers = refHapSegs.refHapPairs().markers();
         double[] cumPos = cumPos(refMarkers, map);
         int nMarkers = refMarkers.nMarkers();
-        int nClusters = refHapSegs.nClusters();
+        int nClusters = refHapSegs.nSegs() - 1;
         float[] wts = new float[cumPos.length];
         if (nClusters > 0) {
-            Arrays.fill(wts, 0, refHapSegs.clusterStart(0), Float.NaN);
+            Arrays.fill(wts, 0, refHapSegs.segStart(1), Float.NaN);
         }
-        for (int j = 0, jj = (nClusters - 1); j < jj; ++j) {
-            int start = refHapSegs.clusterStart(j);
-            int end = refHapSegs.clusterEnd(j);
-            int nextStart = refHapSegs.clusterStart(j+1);
+        for (int j=1; j<nClusters; ++j) {
+            int start = refHapSegs.segStart(j);
+            int end = refHapSegs.segEnd(j-1);
+            int nextStart = refHapSegs.segStart(j+1);
             double nextStartPos = cumPos[nextStart];
             double totalLength = nextStartPos - cumPos[end - 1];
             Arrays.fill(wts, start, end, 1f);
@@ -190,7 +184,7 @@ public class ImputationData {
                 wts[m] = (float) ( (nextStartPos - cumPos[m]) / totalLength );
             }
         }
-        Arrays.fill(wts, refHapSegs.clusterStart(nClusters - 1), nMarkers, Float.NaN);
+        Arrays.fill(wts, refHapSegs.segStart(nClusters), nMarkers, Float.NaN);
         return wts;
     }
 
@@ -208,21 +202,26 @@ public class ImputationData {
     }
 
     private static RefHapSegs refHapSegs(SampleHapPairs refHapPairs,
-            int[] gtStart, int[] gtEnd, int[] gtIndices, int nThreads) {
-        assert gtStart.length == gtEnd.length;
-        int[] clusterStart = new int[gtStart.length];
-        int[] clusterEnd = new int[gtEnd.length];
-        for (int j=0; j<clusterStart.length; ++j) {
-            clusterStart[j] = gtIndices[gtStart[j]];
-            if (j < clusterStart.length - 1) {
-                clusterEnd[j] = gtIndices[gtEnd[j]];
-            }
-            else {
-                clusterEnd[j] = gtIndices[gtEnd[j] - 1] + 1;
-            }
+            int[] targClustEnd, int[] targToRef) {
+        int n = targClustEnd.length;
+        int[] refClusterStart = new int[n+1];
+        int[] refClusterEnd = new int[n+1];
+
+        refClusterStart[0] = 0;
+        refClusterEnd[0] = targToRef[targClustEnd[0] - 1] + 1;
+
+        refClusterStart[1] = targToRef[0];
+        refClusterEnd[1] = targToRef[targClustEnd[1] - 1] + 1;
+
+        for (int j=2; j<n; ++j) {
+            refClusterStart[j] = targToRef[targClustEnd[j-2]];
+            refClusterEnd[j] = targToRef[targClustEnd[j] - 1] + 1;
         }
-        return new RefHapSegs(refHapPairs, clusterStart, clusterEnd,
-                nThreads);
+
+        refClusterStart[n] = targToRef[targClustEnd[n-2]];
+        refClusterEnd[n] = refHapPairs.nMarkers();
+
+        return new RefHapSegs(refHapPairs, refClusterStart, refClusterEnd);
     }
 
     /**
diff --git a/sample/LSHapBaum.java b/sample/LSHapBaum.java
index 2454cb8..09dea47 100644
--- a/sample/LSHapBaum.java
+++ b/sample/LSHapBaum.java
@@ -209,64 +209,64 @@ public class LSHapBaum {
     }
 
     private void setAlleleProbs(float[] alleleProbs) {
-        int nClusters = refHapSegs.nClusters();
         setFirstAlleleProbs(alleleProbs);
-        for (int cluster=1; cluster < nClusters; ++cluster) {
-            setAlleleProbs(alleleProbs, cluster);
+        int nSegsM1 = refHapSegs.nSegs() - 1;
+        for (int j=1; j<nSegsM1; ++j) {
+            setAlleleProbs(alleleProbs, j);
         }
         setLastAlleleProbs(alleleProbs);
     }
 
     private void setFirstAlleleProbs(float[] alleleProbs) {
         int segment = 0;
-        int refMarker = refHapSegs.clusterStart(segment);
         int nSeq = refHapSegs.nSeq(segment);
+        int endRefMarker = refHapSegs.segStart(segment + 1);
         float threshold = threshold(nSeq);
-        for (int h=0; h<nSeq; ++h) {
-            if (bwdHapProbs[segment][h] >= threshold) {
-                for (int m=0; m<refMarker; ++m) {
+        for (int seq=0; seq<nSeq; ++seq) {
+            if (bwdHapProbs[segment][seq] >= threshold) {
+                for (int m=0; m<endRefMarker; ++m) {
                     int start = refMarkers.sumAlleles(m);
-                    int allele = refHapSegs.allele(segment, m, h);
-                    alleleProbs[start + allele] += bwdHapProbs[segment][h];
+                    int allele = refHapSegs.allele(segment, m, seq);
+                    alleleProbs[start + allele] += bwdHapProbs[segment][seq];
                 }
             }
         }
     }
 
-    private void setAlleleProbs(float[] alleleProbs, int cluster) {
-        assert cluster > 0;
-        int startRefMarker = refHapSegs.clusterStart(cluster-1);
-        int midRefMarker = refHapSegs.clusterEnd(cluster - 1);
-        int endRefMarker = refHapSegs.clusterStart(cluster);
-        int nSeq = refHapSegs.nSeq(cluster);
+    private void setAlleleProbs(float[] alleleProbs, int segment) {
+        assert segment > 0;
+        int clustStart = refHapSegs.segStart(segment);
+        int clustEnd = refHapSegs.segEnd(segment - 1);
+        int nextClustStart = refHapSegs.segStart(segment + 1);
+        int nSeq = refHapSegs.nSeq(segment);
         float threshold = threshold(nSeq);
         for (int seq=0; seq<nSeq; ++seq) {
-            boolean useFwd = fwdHapProbs[cluster-1][seq] >= threshold;
-            boolean useBwd = bwdHapProbs[cluster][seq] >= threshold;
+            boolean useFwd = fwdHapProbs[segment-1][seq] >= threshold;
+            boolean useBwd = bwdHapProbs[segment][seq] >= threshold;
             if (useFwd) {
-                for (int m=startRefMarker; m<midRefMarker; ++m) {
+                for (int m=clustStart; m<clustEnd; ++m) {
                     int start = refMarkers.sumAlleles(m);
-                    int allele = refHapSegs.allele(cluster, m - startRefMarker, seq);
-                    alleleProbs[start + allele] += fwdHapProbs[cluster-1][seq];
+                    int allele = refHapSegs.allele(segment, m - clustStart, seq);
+                    alleleProbs[start + allele] += fwdHapProbs[segment-1][seq];
                 }
             }
             if (useFwd || useBwd) {
-                for (int m=midRefMarker; m<endRefMarker; ++m) {
+                for (int m=clustEnd; m<nextClustStart; ++m) {
                     int start = refMarkers.sumAlleles(m);
-                    int allele = refHapSegs.allele(cluster, m - startRefMarker, seq);
+                    int allele = refHapSegs.allele(segment, m - clustStart, seq);
                     double wt = impData.weight(m);
-                    alleleProbs[start + allele] += wt*fwdHapProbs[cluster-1][seq];
-                    alleleProbs[start + allele] += (1-wt)*bwdHapProbs[cluster][seq];
+                    alleleProbs[start + allele] += wt*fwdHapProbs[segment-1][seq];
+                    alleleProbs[start + allele] += (1-wt)*bwdHapProbs[segment][seq];
                 }
             }
         }
     }
 
     private void setLastAlleleProbs(float[] alleleProbs) {
-        int segment = refHapSegs.nClusters();
+        int segment = refHapSegs.nSegs() - 1;
         int cluster = segment - 1;
-        int refMarkerStart = refHapSegs.clusterStart(cluster);
-        int refMarkerEnd = refHapSegs.refHapPairs().nMarkers();
+        int refMarkerStart = refHapSegs.segStart(segment);
+        int refMarkerEnd = refHapSegs.segEnd(segment);
         int nSeq = refHapSegs.nSeq(segment);
         float threshold = threshold(nSeq);
         for (int seq=0; seq<nSeq; ++seq) {
diff --git a/sample/RefHapSegs.java b/sample/RefHapSegs.java
index 3c71c1e..1ca13b6 100644
--- a/sample/RefHapSegs.java
+++ b/sample/RefHapSegs.java
@@ -18,9 +18,7 @@
  */
 package sample;
 
-import blbutil.IntPair;
 import haplotype.SampleHapPairs;
-import java.util.function.Function;
 import java.util.stream.IntStream;
 
 /**
@@ -34,52 +32,38 @@ import java.util.stream.IntStream;
  */
 public class RefHapSegs {
 
-    private final int[] clusterStart;
-    private final int[] clusterEnd;
+    private final int[] segStart;
+    private final int[] segEnd;
     private final SampleHapPairs refHapPairs;
     private final RefHapSeg[] refHapSegs;
 
     /**
      * Constructs a new {@code RefHapSegs} instance from the specified data.
      * @param refHapPairs the reference haplotype pairs
-     * @param clusterStart an array whose {@code j}-th element is the
+     * @param segStart an array whose {@code j}-th element is the
      * starting reference marker index (inclusive) for the {@code j}-th
-     * marker cluster
-     * @param clusterEnd an array whose {@code j}-th element is the
+     * segment of markers
+     * @param segEnd an array whose {@code j}-th element is the
      * ending reference marker index (exclusive) for the {@code j}-th
-     * marker cluster
-     * @param nThreads the number of threads to use during object construction
+     * segment of markers
      * @throws IllegalArgumentException if
-     * {@code clusterStart.length != clusterEnd.length}
+     * {@code segStart.length != segEnd.length}
      * @throws IllegalArgumentException if
-     * {@code clusterStart.length > 0 && clusterStart[0] < 0}
-     * @throws IllegalArgumentException if
-     * {@code clusterEnd.length > 0 && clusterEnd[clusterEnd.length - 1] > nMarkers}
-     * @throws IllegalArgumentException if
-     * {@code clusterStart[j] >= clusterEnd[j]} for some {@code j} satisfying
-     * {@code 0 <= j && j < clusterStart.length}
-     * @throws IllegalArgumentException if
-     * {@code clusterStart[j] < clusterEnd[j-1]} for some {@code j} satisfying
-     * {@code 1 <= j && j < clusterStart.length}
-     * @throws IllegalArgumentException if {@code nThreads < 0}
+     * {@code (segStart[j] < 0 || segStart[j] >= segEnd[j]
+     * || segEnd[j] >= refHapPairs.nMarkers())} for some {@code j} satisfying
+     * {@code 0 <= j && j < segStart.length}
      * @throws NullPointerException if
-     * {@code refHapPairs == null || clusterStart == null || clusterEnd == null}
+     * {@code refHapPairs == null || segStart == null || segEnd == null}
      */
-    public RefHapSegs(SampleHapPairs refHapPairs, int[] clusterStart,
-            int[] clusterEnd, int nThreads) {
-        if (nThreads <=0) {
-            throw new IllegalArgumentException(String.valueOf(nThreads));
-        }
+    public RefHapSegs(SampleHapPairs refHapPairs, int[] segStart, int[] segEnd) {
         int nMarkers = refHapPairs.nMarkers();
-        checkClusters(clusterStart, clusterEnd, nMarkers);
-        this.clusterStart = clusterStart.clone();
-        this.clusterEnd = clusterEnd.clone();
+        checkClusters(segStart, segEnd, nMarkers);
+        this.segStart = segStart.clone();
+        this.segEnd = segEnd.clone();
         this.refHapPairs = refHapPairs;
-        this.refHapSegs = IntStream.rangeClosed(0, this.clusterStart.length)
+        this.refHapSegs = IntStream.range(0, this.segStart.length)
                 .parallel()
-                .mapToObj(j -> intPair(j, this.clusterStart, this.clusterEnd,
-                        nMarkers))
-                .map(ip -> new RefHapSeg(refHapPairs, ip.first(), ip.second()))
+                .mapToObj(j -> new RefHapSeg(refHapPairs, segStart[j], segEnd[j]))
                 .toArray(RefHapSeg[]::new);
     }
 
@@ -87,29 +71,13 @@ public class RefHapSegs {
         if (starts.length != ends.length) {
             throw new IllegalArgumentException("inconsistent data");
         }
-        if (starts.length > 0 && starts[0] < 0) {
-            throw new IllegalArgumentException("inconsistent data");
-        }
-        if (ends.length > 0 && ends[ends.length - 1] > nMarkers) {
-            throw new IllegalArgumentException("inconsistent data");
-        }
         for (int j=0; j<starts.length; ++j) {
-            if (starts[j] >= ends[j]) {
-                throw new IllegalArgumentException("inconsistent data");
-            }
-            if (j>0 && ends[j-1] > starts[j]) {
+            if (starts[j] < 0 || starts[j] >= ends[j] || ends[j] > nMarkers) {
                 throw new IllegalArgumentException("inconsistent data");
             }
         }
     }
 
-    private static IntPair intPair(int index, int[] starts, int[] ends,
-            int nMarkers) {
-        int start = (index == 0) ? 0 :  starts[index - 1];
-        int end = (index == ends.length) ? nMarkers : ends[index];
-        return new IntPair(start, end);
-    }
-
     /**
      * Returns the reference haplotype pairs.
      * @return the reference haplotype pairs
@@ -186,36 +154,36 @@ public class RefHapSegs {
     }
 
     /**
+     * Returns the number of marker segments..
+     * @return the number of marker segments
+     */
+    public int nSegs() {
+        return segStart.length;
+    }
+
+    /**
      * Returns the index of the first marker (inclusive) in the specified
-     * marker cluster.
-     * @param cluster an index of a marker cluster
+     * marker segment.
+     * @param segment an index of a marker segment
      * @return the index of the first marker (inclusive) in the specified
-     * marker cluster
+     * marker segment
      * @throws IndexOutOfBoundsException if
-     * {@code cluster < 0 || cluster >= this.nClusters}
+     * {@code segment < 0 || segment >= this.nSegs()}
      */
-    public int clusterStart(int cluster) {
-        return clusterStart[cluster];
+    public int segStart(int segment) {
+        return segStart[segment];
     }
 
     /**
      * Returns the index of the last marker (exclusive) in the specified
-     * marker cluster.
-     * @param cluster an index of a marker cluster
+     * marker segment.
+     * @param segment an index of a marker segment
      * @return the index of the last marker (exclusive) in the specified
-     * marker cluster
+     * marker segment
      * @throws IndexOutOfBoundsException if
-     * {@code cluster < 0 || cluster >= this.nClusters()}
-     */
-    public int clusterEnd(int cluster) {
-        return clusterEnd[cluster];
-    }
-
-    /**
-     * Returns the number of marker clusters.
-     * @return the number of marker clusters
+     * {@code segment 0  || segment >= this.nSegs()}
      */
-    public int nClusters() {
-        return clusterStart.length;
+    public int segEnd(int segment) {
+        return segEnd[segment];
     }
 }
diff --git a/vcf/AllData.java b/vcf/AllData.java
index bf190b2..b5f5129 100644
--- a/vcf/AllData.java
+++ b/vcf/AllData.java
@@ -146,9 +146,9 @@ public class AllData implements Data {
     }
 
     @Override
-    public void advanceWindow(int requestedOverlap, int windowSize) {
+    public void advanceWindow(int overlap, int windowSize) {
         Samples refSamples = refWindow.samples();
-        refData = refWindow.advanceWindow(requestedOverlap, windowSize);
+        refData = refWindow.advanceWindow(overlap, windowSize);
         refEmissions = new RefGL(refSamples, refData);
         refSampleHapPairs = new RefHapPairs(refEmissions.markers(), refSamples, refData);
         targetData = targetWindow.advanceWindow(refEmissions.markers());
diff --git a/vcf/Data.java b/vcf/Data.java
index 566103e..03e3299 100644
--- a/vcf/Data.java
+++ b/vcf/Data.java
@@ -49,16 +49,12 @@ public interface Data extends Closeable {
 
     /**
      * Advances the sliding window of VCF records, and returns the advanced
-     * window.  The size of the advanced window and the number of markers
-     * of overlap between the marker window immediately before method
-     * invocation and the marker window immediately after method invocation
-     * may differ from the requested values.  If the advanced window size or
-     * overlap is less than the requested value, the actual value will be
-     * as large as possible. If {@code this.lastWindowOnChrom() == true}
-     * before method invocation, then there will be no overlap between the
-     * windows.
+     * window.  The size of the advanced window may differ from the requested
+     * value.  If the advanced window size
+     * is less than the requested value, the actual value will be
+     * as large as possible.
      *
-     * @param overlap the requested number of markers of overlap
+     * @param overlap the number of markers of overlap
      * @param windowSize the requested number of the markers in the window
      * immediately after the method returns
      *
@@ -66,6 +62,10 @@ public interface Data extends Closeable {
      * is detected
      * @throws IllegalArgumentException if
      * {@code overlap < 0 || overlap >= windowSize}
+     * @throws IllegalArgumentException if
+     * {@code overlap > this.nMarkers()} at the time of method invocation
+     * @throws IllegalArgumentException if
+     * {@code overlap > 0 && this.lastWindowOnChromosome() == true}
      * @throws IllegalStateException if
      * {@code this.canAdvanceWindow() == false}
      */
diff --git a/vcf/R2Estimator.java b/vcf/R2Estimator.java
new file mode 100644
index 0000000..e52459c
--- /dev/null
+++ b/vcf/R2Estimator.java
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2016 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+/**
+ * <p>Class {@code R2Estimator} estimates the correlation between the
+ * estimated allele dose and true allele dose for a set of genotypes.
+ * </p>
+ * <p>Instances of class {@code R2Estimator} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class R2Estimator {
+
+    private static final double MIN_R2_DEN = 1e-8;
+
+    private int nGenotypes = 0;
+    private double sumCall = 0;
+    private double sumSquareCall = 0;
+    private double sumExpected = 0;
+    private double sumExpectedSquare = 0;
+    private double sumSquareExpected= 0;
+    private double sumCallExpected = 0;
+
+    /**
+     * Constructs a new {@code R2Estimator} instance.
+     */
+    public R2Estimator() {
+    }
+
+    /**
+     * Clears all genotype data and sets the number of genotype with
+     * allele dose data to 0.
+     */
+    public void clear() {
+        this.nGenotypes = 0;
+        this.sumCall = 0.0;
+        this.sumSquareCall = 0.0;
+        this.sumExpected = 0.0;
+        this.sumExpectedSquare = 0.0;
+        this.sumSquareExpected = 0.0;
+        this.sumCallExpected = 0.0;
+    }
+
+    /**
+     * Returns the current number of genotypes with allele dose data.
+     * @return the current number of genotypes with allele dose data
+     */
+    public int nGenotypes() {
+        return nGenotypes;
+    }
+
+    /**
+     * Adds the specified allele dose probabilities for a genotype to the stored
+     * allele dose data.
+     * @param doseProbs an array of length 3 whose {@code j}-th element
+     * is the probability that the genotype contains {@code j} non-reference
+     * alleles.
+     * @throws IllegalArgumentException if {@code doseProbs.length != 3}
+     * @throws IllegalArgumentException if any element of {@code doseProbs} is
+     * less than 0
+     * @throws IllegalArgumentException if the sum of the elements in
+     * {@code doseProbs} differs from 1.0 by more than {@code 1e-5}
+     * @throws NullPointerException if {@code doseProbs == null}
+     */
+    public void addSampleData(double[] doseProbs) {
+        if (doseProbs.length != 3) {
+            throw new IllegalArgumentException(String.valueOf(doseProbs));
+        }
+        ++nGenotypes;
+        int call = checkDataAndReturnMaxIndex(doseProbs);
+        double exp = (doseProbs[1] + 2*doseProbs[2]);
+        double expSquare = (doseProbs[1] + 4*doseProbs[2]);
+        sumCall += call;
+        sumSquareCall += call*call;
+        sumExpected += exp;
+        sumExpectedSquare += expSquare;
+        sumSquareExpected += (exp*exp);
+        sumCallExpected += (call*exp);
+    }
+
+    private static int checkDataAndReturnMaxIndex(double[] probs) {
+        int maxIndex = 0;
+        double sum = 0.0;
+        for (int j=0; j<probs.length; ++j) {
+            if (probs[j] < 0) {
+                throw new IllegalArgumentException(String.valueOf(probs[j]));
+            }
+            sum += probs[j];
+            if (probs[j] > probs[maxIndex]) {
+                maxIndex = j;
+            }
+        }
+        if (Math.abs(sum - 1.0) > 1e-5) {
+            throw new IllegalArgumentException(String.valueOf(sum));
+        }
+        return maxIndex;
+    }
+
+    /**
+     * Returns the estimated squared correlation between the most probable
+     * ALT allele dose and the true ALT allele dose for the current
+     * genotype data.
+     * Returns 0 if the marker is monomorphic or if the most probable ALT
+     * allele dose is monomorphic.
+     *
+     * @return the estimated squared correlation between the most likely
+     * allele dose and the true allele dose
+     */
+    public double allelicR2() {
+        double f = 1.0/nGenotypes;
+        double cov = sumCallExpected - (sumCall * sumExpected * f);
+        double varBest = sumSquareCall - (sumCall * sumCall * f);
+        double varExp = sumExpectedSquare - (sumExpected * sumExpected * f);
+        double den = varBest*varExp;
+        return (den < MIN_R2_DEN) ? 0.0f : (cov*cov/den);
+    }
+
+    /**
+     * Returns the estimated squared correlation between the expected
+     * ALT allele dose and the true ALT allele dose for the current
+     * genotype data. Returns 0 if the marker is monomorphic.
+     *
+     * @return the estimated squared correlation between the expected ALT
+     * allele dose and the true ALT allele dose
+     */
+    public double doseR2() {
+        double f = 1.0/nGenotypes;
+        double num = sumSquareExpected - (sumExpected * sumExpected * f);
+        double den = sumExpectedSquare - (sumExpected * sumExpected * f);
+        if (num < 0.0) {
+            num = 0.0;
+        }
+        return (den < MIN_R2_DEN) ? 0.0f : (num / den);
+    }
+}
diff --git a/vcf/VcfRecBuilder.java b/vcf/VcfRecBuilder.java
new file mode 100644
index 0000000..e2356cc
--- /dev/null
+++ b/vcf/VcfRecBuilder.java
@@ -0,0 +1,406 @@
+/*
+ * Copyright (C) 2016 Brian L. Browning
+ *
+ * This file is part of Beagle
+ *
+ * Beagle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Beagle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+package vcf;
+
+import blbutil.Const;
+import java.io.PrintWriter;
+import java.math.BigDecimal;
+import java.math.MathContext;
+import java.text.DecimalFormat;
+import java.util.Arrays;
+
+/**
+ * <p>Class {@code VcfRecBuilder} contains methods for constructing
+ * and printing a VCF record in VCF 4.2 format.  The FORMAT field data
+ * for each sample is added sequentially to the record via the
+ * {@code addSampleData()} method.
+ *
+ * </p>
+ * <p>Instances of class {@code VcfRecBuilder} are not thread-safe.
+ * </p>
+ *
+ * @author Brian L. Browning {@code <browning at uw.edu>}
+ */
+public final class VcfRecBuilder {
+
+    /**
+     * The default initial size for the string buffer, which is 50
+     * characters.
+     */
+    public static final int DEFAULT_INIT_SIZE = 50;
+
+    private static final MathContext mc2 = new MathContext(2);
+    private static final BigDecimal ONE = new BigDecimal(1.0);
+    private static final String[] dsProbs = dsProbs();
+    private static final String[] r2Probs = r2Probs(dsProbs);
+
+    private final StringBuilder sb;
+    private final R2Estimator r2Est;
+    private final double[] gt3Probs;
+
+    private Marker marker;
+    private boolean printDS;
+    private boolean printGP;
+    private int nAlleles;
+    private int nGenotypes;
+    private int allele1;
+    private int allele2;
+    private double[] gtProbs;
+    private double[] dose;
+    private double[] cumAlleleProbs;
+
+    private static String[] dsProbs() {
+        DecimalFormat df = new DecimalFormat("#.##");
+        String[] probs = new String[201];
+        for (int j=0;j<probs.length; ++j) {
+            probs[j] = df.format(j/100.0);
+        }
+        return probs;
+    }
+
+    private static String[] r2Probs(String[] dsProbs) {
+        DecimalFormat df = new DecimalFormat("0.00");
+        String[] probs = Arrays.copyOf(dsProbs, 101);
+        for (int j=0;j<probs.length; ++j) {
+            if (probs[j].length()!=4) {
+                probs[j] = df.format(j/100.0);
+            }
+        }
+        return probs;
+    }
+
+    /**
+     * Constructs a new {@code VcfRecBuilder} instance with initial buffer
+     * size equal to {@code VcfRecBuilder.DEFAULT_INIT_SIZE}.
+     */
+    public VcfRecBuilder() {
+        this(DEFAULT_INIT_SIZE);
+    }
+
+    /**
+     * Constructs a new {@code VcfRecBuilder} instance with the specified
+     * initial buffer size.
+     *
+     * @param initSize the initial buffer size
+     * @throws NegativeArraySizeException if {@code initCapacity < 0}
+     */
+    public VcfRecBuilder(int initSize) {
+        this.sb = new StringBuilder(initSize);
+        this.r2Est = new R2Estimator();
+        this.gt3Probs = new double[3];
+    }
+
+    /**
+     * Clears existing data, and sets the current marker to the specified
+     * marker.  If the FORMAT field contains a DS or GP subfield,
+     * the INFO field will include the AR2 (allele r2), DR2 (dose r2), and
+     * AF (ALT allele frequency) subfields.
+     * @param marker the marker to which data will be added
+     * @param printDS {@code true} if the FORMAT field in the VCF record for
+     * this marker will include a DS subfield, and {@code false} otherwise
+     * @param printGP {@code true} if the FORMAT field in the VCF record for
+     * this marker will include a GP subfield, and {@code false} otherwise
+     * @throws NullPointerException if {@code marker == null}
+     */
+    public void reset(Marker marker, boolean printDS, boolean printGP) {
+        this.sb.setLength(0);
+        this.r2Est.clear();
+        this.marker = marker;
+        this.printDS = printDS;
+        this.printGP = printGP;
+        this.allele1 = -1;
+        this.allele2 = -1;
+        this.nAlleles = marker.nAlleles();
+        this.nGenotypes = marker.nGenotypes();
+        this.gtProbs = new double[marker.nGenotypes()];
+        this.cumAlleleProbs = new double[marker.nAlleles()];
+        this.dose = new double[marker.nAlleles()];
+    }
+
+    /**
+     * Returns the current marker.  Returns {@code null} if
+     * {@code this.reset()} has not been previously invoked.
+     * @return the current marker.
+     */
+    public Marker marker() {
+        return marker;
+    }
+
+    /**
+     * Returns {@code true} if the FORMAT field in the VCF record for
+     * this marker includes a DS subfield, and {@code false} otherwise
+     * @return {@code true} if the FORMAT field in the VCF record for
+     * this marker includes a DS subfield
+     */
+    public boolean printDS() {
+        return printDS;
+    }
+
+    /**
+     * Returns {@code true} if the FORMAT field in the VCF record for
+     * this marker includes a GP subfield, and {@code false} otherwise
+     * @return {@code true} if the FORMAT field in the VCF record for
+     * this marker includes a GP subfield
+     */
+    public boolean printGP() {
+        return printGP;
+    }
+
+    /**
+     * Adds the FORMAT field for a sample to the VCF record for the current
+     * marker.
+     * @param gtProbs the posterior genotype probabilities
+     * @throws IllegalArgumentException if
+     * {@code gtProbs.length != this.marker().nGenotypes()}
+     * @throws IllegalStateException if {@code this.marker() == null}
+     * @throws NullPointerException if {@code gtProbs == null}
+     */
+    public void addSampleData(double[] gtProbs) {
+        initializeFieldsAndCheckParameters(gtProbs);
+        this.gtProbs[0] = gtProbs[0];
+        gt3Probs[0] = gtProbs[0];
+        double maxProb = gtProbs[0];
+        double sum = gtProbs[0];
+        int gt = 1;
+        for (int a2=1; a2<nAlleles; ++a2) {
+            for (int a1=0; a1<=a2; ++a1) {
+                double gtProb = gtProbs[gt];
+                if (gtProb < 0) {
+                    throw new IllegalArgumentException(String.valueOf(gtProb));
+                }
+                this.gtProbs[gt] = gtProb;
+                sum += gtProb;
+                dose[a1] += gtProb;
+                dose[a2] += gtProb;
+                gt3Probs[(a1==0) ? 1 : 2] += gtProb;
+                if (gtProb > maxProb) {
+                    allele1 = a1;
+                    allele2 = a2;
+                    maxProb = gtProb;
+                }
+                ++gt;
+            }
+        }
+        if (Math.abs(sum - 1.0) > 1e-6) {
+            throw new IllegalArgumentException(String.valueOf(sum));
+        }
+        addToCumAlleleProbs(dose);
+        r2Est.addSampleData(gt3Probs);
+        appendFormatData(false);
+    }
+
+    private void initializeFieldsAndCheckParameters(double[] gtProbs) {
+        if (marker==null) {
+            throw new IllegalStateException();
+        }
+        if (gtProbs.length != nGenotypes) {
+            throw new IllegalArgumentException(String.valueOf(gtProbs.length));
+        }
+        Arrays.fill(gt3Probs, 0.0);
+        Arrays.fill(dose, 0.0);
+        allele1 = 0;
+        allele2 = 0;
+    }
+
+    /**
+     * Adds the FORMAT field for a sample to the VCF record for the current
+     * marker.
+     * @param alProbs1 the posterior allele probabilities for the individual's
+     * first allele
+     * @param alProbs2 the posterior allele probabilities for the individual's
+     * second allele
+     * @throws IllegalArgumentException if
+     * {@code alProbs1.length != this.marker().nAlleles()}
+     * @throws IllegalArgumentException if
+     * {@code alProbs2.length != this.marker().nAlleles()}
+     * @throws IllegalStateException if {@code this.marker() == null}
+     * @throws NullPointerException if
+     * {@code alProbs1 == null || alProbs2 == null}
+     */
+    public void addSampleData(double[] alProbs1, double[] alProbs2) {
+        if (marker==null) {
+            throw new IllegalStateException();
+        }
+        allele1 = maxIndexAndParameterCheck(alProbs1);
+        allele2 = maxIndexAndParameterCheck(alProbs2);
+        Arrays.fill(gt3Probs, 0.0);
+        dose[0] = alProbs1[0] + alProbs2[0];
+        gtProbs[0] = alProbs1[0] * alProbs2[0];
+        gt3Probs[0] = gtProbs[0];
+        int gt = 1;
+        for (int a2=1; a2<alProbs1.length; ++a2) {
+            dose[a2] = alProbs1[a2] + alProbs2[a2];
+            for (int a1=0; a1<=a2; ++a1) {
+                double gtProb = alProbs1[a1]*alProbs2[a2];
+                if (a1!=a2) {
+                    gtProb += alProbs1[a2]*alProbs2[a1];
+                }
+                gtProbs[gt++] = gtProb;
+                gt3Probs[(a1==0) ? 1 : 2] += gtProb;
+            }
+        }
+        addToCumAlleleProbs(dose);
+        r2Est.addSampleData(gt3Probs);
+        boolean isPhased = true;
+        appendFormatData(isPhased);
+    }
+
+    private int maxIndexAndParameterCheck(double[] da) {
+        if (da.length != nAlleles) {
+            throw new IllegalArgumentException(String.valueOf(da.length));
+        }
+        int maxIndex = 0;
+        double sum = 0;
+        for (int j=0; j<da.length; ++j) {
+            if (da[j] < 0) {
+                throw new IllegalArgumentException(String.valueOf(da[j]));
+            }
+            sum += da[j];
+            if (da[j] > da[maxIndex]) {
+                maxIndex = j;
+            }
+        }
+        if (Math.abs(sum - 1.0) > 1e-6) {
+            throw new IllegalArgumentException(String.valueOf(sum));
+        }
+        return maxIndex;
+    }
+
+    private void addToCumAlleleProbs(double[] dose) {
+        for (int j=0; j<dose.length; ++j) {
+            cumAlleleProbs[j] += dose[j];
+        }
+    }
+
+    private void appendFormatData(boolean isPhased) {
+        sb.append(Const.tab);
+        sb.append(allele1);
+        sb.append(isPhased ? Const.phasedSep : Const.unphasedSep);
+        sb.append(allele2);
+        if (printDS) {
+            for (int j=1; j<nAlleles; ++j) {
+                sb.append( (j==1) ? Const.colon : Const.comma );
+                sb.append(dsProbs[(int) Math.rint(100*dose[j])]);
+            }
+        }
+        if (printGP) {
+            for (int j=0; j<gtProbs.length; ++j) {
+                sb.append(j==0? Const.colon : Const.comma);
+                sb.append(dsProbs[(int) Math.rint(100*gtProbs[j])]);
+            }
+        }
+    }
+
+    /**
+     * Prints the current VCF record for the current marker to the specified
+     * {@code PrintWriter}.  If the FORMAT field contains a DS or GP subfield,
+     * the INFO field will include the AR2 (allele r2), DR2 (dose r2), and
+     * AF (ALT allele frequency) subfields.  Invocation of this method has
+     * no effect if {@code this.reset()} has not previously been invoked.
+     * @param out the {@code PrintWriter} to which the VCF record will be
+     * printed
+     * @param isImputed {@code true} if the printed VCF record will
+     * have an IMP flag in the INFO field and {@code false} otherwise
+     * @throws NullPointerException if {@code out == null}
+     */
+    public void writeRec(PrintWriter out, boolean isImputed) {
+        if (marker!=null) {
+            printMarker(marker, out);
+            out.print(Const.tab);
+            out.print(Const.MISSING_DATA_CHAR);     // QUAL
+            out.print(Const.tab);
+            out.print("PASS");                      // FILTER
+            out.print(Const.tab);
+            printInfo(out, isImputed);              // INFO
+            out.print(Const.tab);
+            out.print(format(printDS, printGP));    // FORMAT
+            out.println(sb);
+        }
+    }
+
+    private void printInfo(PrintWriter out, boolean isImputed) {
+        if (printDS || printGP) {
+            out.print("AR2=");
+            out.print(r2Probs[(int) Math.rint(100*r2Est.allelicR2())]);
+            out.print(";DR2=");
+            out.print(r2Probs[(int) Math.rint(100*r2Est.doseR2())]);
+            for (int j=1; j<nAlleles; ++j) {
+                out.print( (j==1) ? ";AF=" : Const.comma);
+                out.print(formatProb(cumAlleleProbs[j]/(2*r2Est.nGenotypes())));
+            }
+            if (isImputed) {
+                out.print(";IMP");
+            }
+        }
+        else {
+            out.print(Const.MISSING_DATA_CHAR);
+        }
+    }
+
+    private static String format(boolean printDS, boolean printGP) {
+        if (printDS) {
+            return printGP ? "GT:DS:GP" : "GT:DS";
+        }
+        else {
+            return "GT";
+        }
+    }
+
+    private static void printMarker(Marker marker, PrintWriter out) {
+        out.print(marker.chrom());
+        out.print(Const.tab);
+        out.print(marker.pos());
+        int nIds = marker.nIds();
+        if (nIds==0) {
+            out.print(Const.tab);
+            out.print(Const.MISSING_DATA_CHAR);
+        }
+        else {
+            for (int j=0; j<nIds; ++j) {
+                out.print(j==0 ? Const.tab : Const.semicolon);
+                out.print(marker.id(j));
+            }
+        }
+        int nAlleles = marker.nAlleles();
+        if (nAlleles==1) {
+            out.print(Const.tab);
+            out.print(marker.allele(0));
+            out.print(Const.tab);
+            out.print(Const.MISSING_DATA_CHAR);
+        }
+        else {
+            for (int j=0; j<nAlleles; ++j) {
+                out.print(j<2 ? Const.tab : Const.comma);
+                out.print(marker.allele(j));
+            }
+        }
+    }
+
+    private static String formatProb(double p) {
+        if (p>=0 && p <= 0.5) {
+            return new BigDecimal(p).round(mc2).toString();
+        }
+        else if (p <= 1.0) {
+            return new BigDecimal(p-1.0).round(mc2).add(ONE).toString();
+        }
+        else {
+            throw new IllegalArgumentException(String.valueOf(p));
+        }
+    }
+}
diff --git a/vcf/VcfWindow.java b/vcf/VcfWindow.java
index 8fc44f5..7f61c7e 100644
--- a/vcf/VcfWindow.java
+++ b/vcf/VcfWindow.java
@@ -95,7 +95,7 @@ public class VcfWindow implements Closeable {
      * there will be no overlap between the advanced window and the previous
      * window.
      *
-     * @param overlap the requested number of markers of overlap
+     * @param overlap the number of markers of overlap
      * @param windowSize the requested number of the markers in the window
      * immediately after the method returns
      * @return the advanced window of VCF records
@@ -104,6 +104,10 @@ public class VcfWindow implements Closeable {
      * a VCF record
      * @throws IllegalArgumentException if
      * {@code overlap < 0 || overlap >= windowSize}
+     * @throws IllegalArgumentException if {@code overlap > this.size()}
+     * at the time of method invocation
+     * @throws IllegalArgumentException if
+     * {@code overlap > 0 && this.lastWindowOnChromosome() == true}
      * @throws IllegalStateException if
      * {@code this.canAdvanceWindow() == false}
      */
@@ -112,8 +116,7 @@ public class VcfWindow implements Closeable {
             throw new IllegalStateException("canAdvanceWindow()==false");
         }
         checkParameters(overlap, windowSize);
-        overlap = getActualOverlap(overlap);
-        List<VcfEmission> newWindow = new ArrayList<>(windowSize);
+        List<VcfEmission> newWindow = new ArrayList<>(overlap + 1000);
 
         newWindow.addAll(window.subList(window.size() - overlap, window.size()));
         int currentChromIndex = currentChromIndex(newWindow);
@@ -136,86 +139,17 @@ public class VcfWindow implements Closeable {
         return window.toArray(new VcfEmission[0]);
     }
 
-    /**
-     * Advances the sliding window of VCF records, and returns the advanced
-     * window as a {@code VcfEmission[]} object.  The size of the advanced
-     * window and the number of markers of overlap between the marker window
-     * immediately before method invocation and the marker window immediately
-     * after method invocation may differ from the requested values.  If the
-     * distance the window is advanced or the overlap is less than the requested
-     * value, the actual distance or overlap will be as large as possible. If
-     * {@code this.lastWindowOnChrom() == true}
-     * before method invocation, then there will be no overlap between the
-     * advanced window and the previous window
-     *
-     * @param overlap the requested number of markers of overlap
-     * @param cM the requested distance in cM to advance the window
-     * @param map the genetic map
-     * @return the advanced window of VCF records
-     *
-     * @throws IllegalArgumentException if a format error is detected in
-     * a VCF record
-     * @throws IllegalArgumentException if {@code overlap < 0 || cM <= 0}
-     * @throws IllegalStateException if
-     * {@code this.canAdvanceWindow() == false}
-     */
-    public VcfEmission[] advanceWindow(int overlap, double cM, GeneticMap map) {
-        if (canAdvanceWindow()==false) {
-            throw new IllegalStateException("canAdvanceWindow()==false");
-        }
-        if (overlap < 0) {
-            throw new IllegalArgumentException(String.valueOf(overlap));
-        }
-        if (cM < 0) {
-            throw new IllegalArgumentException(String.valueOf(cM));
-        }
-
-        overlap = getActualOverlap(overlap);
-        List<VcfEmission> newWindow = new ArrayList<>(overlap + 1000);
-
-        newWindow.addAll(window.subList(window.size() - overlap, window.size()));
-        int currentChromIndex = currentChromIndex(newWindow);
-        double endMapPos = startMapPos(newWindow, map) + cM;
-        while (next != null
-                && next.marker().chromIndex()==currentChromIndex
-                && map.genPos(next.marker()) < endMapPos) {
-            newWindow.add(next);
-            next = it.hasNext() ? it.next() : null;
-        }
-        // add all markers at the same marker position
-        VcfEmission last = newWindow.get(newWindow.size()-1);
-        while (next!=null && samePosition(last, next)) {
-            newWindow.add(next);
-            next = it.hasNext() ? it.next() : null;
-        }
-        this.overlap = overlap;
-        this.window.clear();
-        this.window.addAll(newWindow);
-        this.cumMarkerCnt += (window.size() - overlap);
-        return window.toArray(new VcfEmission[0]);
-    }
-
     private void checkParameters(int overlap, int windowSize) {
         if (overlap < 0 || overlap >= windowSize) {
             String s = "overlap=" + overlap + "windowSize=" + windowSize;
             throw new IllegalArgumentException(s);
         }
-    }
-
-    private int getActualOverlap(int overlap) {
-        if (window.isEmpty() || lastWindowOnChrom()) {
-            return 0;
-        }
-        int n = window.size();
-        if (overlap > n) {
-            overlap = n;
+        if (overlap > window.size()) {
+            throw new IllegalArgumentException(String.valueOf(overlap));
         }
-        while (overlap > 0 && overlap < n
-                && window.get(n - overlap).marker().pos()
-                    == window.get(n - overlap - 1).marker().pos()) {
-            ++overlap;
+        if (overlap > 0 && lastWindowOnChrom()) {
+            throw new IllegalArgumentException(String.valueOf(overlap));
         }
-        return overlap;
     }
 
     private int currentChromIndex(List<VcfEmission> currentWindow) {
@@ -230,19 +164,6 @@ public class VcfWindow implements Closeable {
         }
     }
 
-    private double startMapPos(List<VcfEmission> currentWindow, GeneticMap map) {
-        if (currentWindow.isEmpty()==false) {
-            Marker m = currentWindow.get(currentWindow.size() - 1).marker();
-            return map.genPos(m);
-        }
-        else if (next!=null) {
-            return map.genPos(next.marker());
-        }
-        else {
-            return 0;
-        }
-    }
-
     private boolean samePosition(VcfEmission a, VcfEmission b) {
         return a.marker().chromIndex()==b.marker().chromIndex()
                 && a.marker().pos()==b.marker().pos();
@@ -275,6 +196,14 @@ public class VcfWindow implements Closeable {
     }
 
     /**
+     * Returns the number of VCF records in the current window.
+     * @return the number of VCF records in the current window
+     */
+    public int size() {
+        return window.size();
+    }
+
+    /**
      * Returns the number of VCF records in the overlap between the current
      * window and the previous window.  Returns 0 if the current window
      * is the first window.
diff --git a/vcf/VcfWriter.java b/vcf/VcfWriter.java
index 5c6fe53..31a1639 100644
--- a/vcf/VcfWriter.java
+++ b/vcf/VcfWriter.java
@@ -1,32 +1,11 @@
-/*
- * Copyright (C) 2014 Brian L. Browning
- *
- * This file is part of Beagle
- *
- * Beagle is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Beagle is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
+/*__LICENSE__*/
 package vcf;
 
 import blbutil.Const;
 import java.io.PrintWriter;
-import java.math.BigDecimal;
-import java.math.MathContext;
-import java.text.DecimalFormat;
 import java.text.SimpleDateFormat;
 import java.util.Calendar;
 import main.AlleleProbs;
-import main.ConstrainedAlleleProbs;
 import main.GenotypeValues;
 
 /**
@@ -40,13 +19,6 @@ import main.GenotypeValues;
  */
 public final class VcfWriter {
 
-    private static final String PASS = "PASS";
-    private static final DecimalFormat df2 = new DecimalFormat("#.##");
-    private static final DecimalFormat df3 = new DecimalFormat("#.###");
-    private static final DecimalFormat df2_fixed = new DecimalFormat("0.00");
-    private static final MathContext mc2 = new MathContext(2);
-    private static final BigDecimal ONE = new BigDecimal(1.0);
-
     private static final String fileformat = "##fileformat=VCFv4.2";
 
     private static final String afInfo = "##INFO=<ID=AF,Number=A,Type=Float,"
@@ -62,7 +34,7 @@ public final class VcfWriter {
 
     private static final String gtFormat = "##FORMAT=<ID=GT,Number=1,Type=String,"
             + "Description=\"Genotype\">";
-    private static final String dsFormat = "##FORMAT=<ID=DS,Number=1,Type=Float,"
+    private static final String dsFormat = "##FORMAT=<ID=DS,Number=A,Type=Float,"
             +"Description=\"estimated ALT dose [P(RA) + P(AA)]\">";
     private static final String glFormat = "##FORMAT=<ID=GL,Number=G,Type=Float,"
             + "Description=\"Log10-scaled Genotype Likelihood\">";
@@ -189,152 +161,137 @@ public final class VcfWriter {
         if (start > end) {
             throw new IllegalArgumentException("start=" + start + " end=" + end);
         }
-        for (int marker=start; marker<end; ++marker) {
-            printFixedFields(gv, marker, out);
-            for (int sample=0, n=gv.nSamples(); sample<n; ++sample) {
-                print_GT_DS_GP(gv, marker, sample, out);
-            }
-            out.println();
-        }
-    }
-
-
-    private static void print_GT_DS_GP(GenotypeValues gv, int marker, int sample,
-            PrintWriter out) {
-        int nAlleles = gv.marker(marker).nAlleles();
-        int nGenotypes = gv.marker(marker).nGenotypes();
-        float[] dose = new float[nAlleles];
-        int bestA1 = -1;
-        int bestA2 = -1;
-        int gt = 0;
-        float sum = 0f;
-        float maxGP = 0f;
-        for (int a2=0; a2<nAlleles; ++a2) {
-            for (int a1=0; a1<=a2; ++a1) {
-                float value = gv.value(marker, sample, gt++);
-                if (value > maxGP) {
-                    bestA1 = a1;
-                    bestA2 = a2;
-                    maxGP = value;
+        boolean printDS = true;
+        boolean printGP = true;
+        boolean isImputed = false;
+        VcfRecBuilder vrb = new VcfRecBuilder(12*gv.nSamples());
+        for (int m=start; m<end; ++m) {
+            Marker marker = gv.marker(m);
+            vrb.reset(marker, printDS, printGP);
+            double[] gprobs = new double[marker.nGenotypes()];
+            for (int s=0, n=gv.nSamples(); s<n; ++s) {
+                double sum = 0.0;
+                for (int gt=0; gt<gprobs.length; ++gt) {
+                    gprobs[gt] = gv.value(m, s, gt);
+                    sum += gprobs[gt];
                 }
-                dose[a1] += value;
-                dose[a2] += value;
-                sum += value;
+                for (int gt=0; gt<gprobs.length; ++gt) {
+                    gprobs[gt] /= sum;
+                }
+                vrb.addSampleData(gprobs);
             }
-        }
-        out.print(Const.tab);
-        out.print(bestA1 == -1 ? Const.MISSING_DATA_STRING : bestA1);
-        out.print(Const.unphasedSep);
-        out.print(bestA2 == -1 ? Const.MISSING_DATA_STRING : bestA2);
-        for (int al = 1; al < dose.length; ++al) {
-            out.print( (al==1) ? Const.colon : Const.comma);
-            out.print(df2.format(dose[al]/sum));
-        }
-        for (gt=0; gt<nGenotypes; ++gt) {
-            out.print(gt==0 ? Const.colon : Const.comma);
-            double v = gv.value(marker, sample, gt)/sum;
-            out.print(df2.format(v));
+            vrb.writeRec(out, isImputed);
         }
     }
 
     /**
-     * Writes the specified genotype data as VCF records to the specified
+     * Writes the data in alProbs for markers with index between
+     * {@code start} (inclusive) and {@code end} (exclusive) to the specified
      * {@code PrintWriter}.
-     * @param alProbs the sample haplotype pairs
+     * @param alProbs the estimated haplotype allele probabilities
+     * @param isImputed an array of length {@code alProbs.nMarkers()}
+     * whose {@code j}-th element is {@code true} if the corresponding
+     * marker is imputed, and {@code false} otherwise
      * @param start the starting marker index (inclusive)
      * @param end the ending marker index (exclusive)
-     * @param gprobs {@code true} if the GP field should be printed, and
-     * {@code false} otherwise.
-     * @param out the {@code PrintWriter} to which VCF records will
-     * be written
-     *
+     * @param printDS {@code true} if the DS field should be printed, and
+     * {@code false} otherwise
+     * @param printGP {@code true} if the GP field should be printed, and
+     * {@code false} otherwise
+     * @param out the {@code PrintWriter} to which VCF records will be written
+     * @throws IllegalArgumentException if
+     * {@code isImputed.length != alProbs.nMarkers()}
      * @throws IndexOutOfBoundsException if
      * {@code (start < 0 || start > end || end > alProbs.nMarkers())}
-     * @throws NullPointerException if {@code haps == null || out == null}
+     * @throws NullPointerException if
+     * {@code alProbs == null || isImputed == null || out == null}
      */
-    public static void appendRecords(ConstrainedAlleleProbs alProbs,
-            int start, int end, boolean gprobs, PrintWriter out) {
+    public static void appendRecords(AlleleProbs alProbs, boolean[] isImputed,
+            int start, int end, boolean printDS, boolean printGP,
+            PrintWriter out) {
+        if (isImputed.length != alProbs.nMarkers()) {
+            throw new IllegalArgumentException("inconsistent data");
+        }
         if (start > end) {
             throw new IllegalArgumentException("start=" + start + " end=" + end);
         }
-        boolean ds = true;
-        String format = format(ds, gprobs);
+        VcfRecBuilder vrb = new VcfRecBuilder(4*alProbs.nSamples());
         for (int m=start; m<end; ++m) {
             Marker marker = alProbs.marker(m);
-            boolean isImputed = alProbs.isImputed(m);
-            GprobsStatistics gps = new GprobsStatistics(alProbs, m);
-            String info = info(gps, isImputed);
-            printFixedFields(marker, info, format, out);
+            vrb.reset(marker, printDS, printGP);
+            double[] a1 = new double[marker.nAlleles()];
+            double[] a2 = new double[marker.nAlleles()];
             for (int sample=0, n=alProbs.nSamples(); sample<n; ++sample) {
-                printGTandDose(alProbs, m, sample, ds, out);
-                if (gprobs) {
-                     printGP(alProbs, m, sample, out);
+                for (int j=0; j<a1.length; ++j) {
+                    a1[j] = alProbs.alProb1(m, sample, j);
+                    a2[j] = alProbs.alProb2(m, sample, j);
                 }
+                vrb.addSampleData(a1, a2);
             }
-            out.println();
+            vrb.writeRec(out, isImputed[m]);
         }
     }
 
     /**
-     * Writes the specified genotype data as VCF records to the specified
+     * Writes the data in alProbs for markers with index between
+     * {@code start} (inclusive) and {@code end} (exclusive) to the specified
      * {@code PrintWriter}.
-     * @param alProbs the sample haplotype pairs
+     * @param alProbs the estimated haplotype allele probabilities
+     * @param isImputed an array of length {@code alProbs.nMarkers()}
+     * whose {@code j}-th element is {@code true} if the corresponding
+     * marker is imputed, and {@code false} otherwise
      * @param start the starting marker index (inclusive)
      * @param end the ending marker index (exclusive)
-     * @param out the {@code PrintWriter} to which VCF records will
-     * be written
-     *
+     * @param printDS {@code true} if the DS field should be printed, and
+     * {@code false} otherwise
+     * @param printGP {@code true} if the GP field should be printed, and
+     * {@code false} otherwise
+     * @param out the {@code PrintWriter} to which VCF records will be written
+     * @throws IllegalArgumentException if
+     * {@code isImputed.length != alProbs.nMarkers()}
      * @throws IndexOutOfBoundsException if
      * {@code (start < 0 || start > end || end > alProbs.nMarkers())}
-     * @throws NullPointerException if {@code haps == null || out == null}
+     * @throws NullPointerException if
+     * {@code alProbs == null || isImputed == null || out == null}
      */
-    public static void appendRecords(AlleleProbs alProbs, int start, int end,
-            PrintWriter out) {
-        boolean ds = false;
-        boolean gp = false;
-        String info = Const.MISSING_DATA_STRING;
-        String format = format(ds, gp);
+    public static void blockedAppendRecords(AlleleProbs alProbs,
+            boolean[] isImputed, int start, int end,
+            boolean printDS, boolean printGP, PrintWriter out) {
+        if (isImputed.length != alProbs.nMarkers()) {
+            throw new IllegalArgumentException("inconsistent data");
+        }
         if (start > end) {
             throw new IllegalArgumentException("start=" + start + " end=" + end);
         }
-        for (int m=start; m<end; ++m) {
-            Marker marker = alProbs.marker(m);
-            printFixedFields(marker, info, format, out);
-            for (int sample=0, n=alProbs.nSamples(); sample<n; ++sample) {
-                printGTandDose(alProbs, m, sample, ds, out);
-            }
-            out.println();
-        }
-    }
-
-    private static void printGTandDose(AlleleProbs alProbs, int marker, int
-            sample, boolean ds, PrintWriter out) {
-        out.print(Const.tab);
-        out.print(alProbs.allele1(marker, sample));
-        out.append(Const.phasedSep);
-        out.print(alProbs.allele2(marker, sample));
-        if (ds) {
-            int nAlleles = alProbs.marker(marker).nAlleles();
-            for (int j = 1; j < nAlleles; ++j) {
-                float p1 = alProbs.alProb1(marker, sample, j);
-                float p2 = alProbs.alProb2(marker, sample, j);
-                out.print( (j==1) ? Const.colon : Const.comma );
-                out.print(df2.format(p1 + p2));
-            }
+        int nSamples = alProbs.nSamples();
+        int size = 8;
+        int initCapacity = 4*alProbs.nSamples();
+        VcfRecBuilder[] recBuilders = new VcfRecBuilder[size];
+        double[][] a1 = new double[size][];
+        double[][] a2 = new double[size][];
+        for (int j=0; j<recBuilders.length; ++j) {
+            recBuilders[j] = new VcfRecBuilder(initCapacity);
         }
-    }
-
-    private static void printGP(AlleleProbs alProbs, int marker, int sample,
-            PrintWriter out) {
-        int nAlleles = alProbs.marker(marker).nAlleles();
-        for (int a2=0; a2<nAlleles; ++a2) {
-            for (int a1=0; a1<=a2; ++a1) {
-                out.print((a2 == 0 && a1 == 0) ? Const.colon : Const.comma);
-                float gtProb = alProbs.gtProb(marker, sample, a1, a2);
-                if (a1 != a2) {
-                    gtProb += alProbs.gtProb(marker, sample, a2, a1);
+        for (int ii=start; ii<end; ii+=size) {
+            int ni = Math.min(end, ii+size);
+            for (int i=ii; i<ni; ++i) {
+                Marker marker = alProbs.marker(i);
+                recBuilders[i-ii].reset(marker, printDS, printGP);
+                a1[i-ii] = new double[marker.nAlleles()];
+                a2[i-ii] = new double[marker.nAlleles()];
+                for (int jj=0; jj<nSamples; jj+=size) {
+                    int nj = Math.min(jj+size, nSamples);
+                    for (int j=jj; j<nj; ++j) { // j = sample
+                        for (int k=0; k<a1[i-ii].length; ++k) {
+                            a1[i-ii][k] = alProbs.alProb1(i, j, k);
+                            a2[i-ii][k] = alProbs.alProb2(i, j, k);
+                        }
+                        recBuilders[i-ii].addSampleData(a1[i-ii], a2[i-ii]);
+                    }
                 }
-                out.print(df2.format(gtProb));
+            }
+            for (int i=ii; i<ni; ++i) {
+                recBuilders[i-ii].writeRec(out, isImputed[i]);
             }
         }
     }
@@ -355,88 +312,10 @@ public final class VcfWriter {
         out.print(Const.tab);
         out.print(Const.MISSING_DATA_CHAR); // QUAL
         out.print(Const.tab);
-        out.print(PASS);                    // FILTER
+        out.print("PASS");                    // FILTER
         out.print(Const.tab);
         out.print(Const.MISSING_DATA_CHAR); // INFO
         out.print(Const.tab);
         out.print("GT");
     }
-
-    private static void printFixedFields(GenotypeValues gv, int marker,
-            PrintWriter out) {
-        GprobsStatistics gpm = new GprobsStatistics(gv, marker);
-        double[] alleleFreq = gpm.alleleFreq();
-        out.print(gv.marker(marker));
-        out.print(Const.tab);
-        out.print(Const.MISSING_DATA_CHAR); // QUAL
-        out.print(Const.tab);
-        out.print(PASS);                    // FILTER
-        out.print(Const.tab);
-        out.print("AR2=");                  // INFO
-        out.print(df2_fixed.format(gpm.allelicR2()));
-        out.print(";DR2=");
-        out.print(df2_fixed.format(gpm.doseR2()));
-        for (int j=1; j<alleleFreq.length; ++j) {
-            out.print( (j==1) ? ";AF=" : Const.comma);
-            out.print(formatProb(alleleFreq[j]));
-        }
-        out.print(Const.tab);
-        out.print("GT:DS:GP");
-    }
-
-    private static String info(GprobsStatistics gps, boolean isImputed) {
-        double[] alleleFreq = gps.alleleFreq();
-        StringBuilder sb = new StringBuilder(20);
-        sb.append("AR2=");                  // INFO
-        sb.append(df2_fixed.format(gps.allelicR2()));
-        sb.append(";DR2=");
-        sb.append(df2_fixed.format(gps.doseR2()));
-        for (int j=1; j<alleleFreq.length; ++j) {
-            sb.append( (j==1) ? ";AF=" : Const.comma);
-            sb.append(formatProb(alleleFreq[j]));
-        }
-        if (isImputed) {
-            sb.append(";IMP");
-        }
-        return sb.toString();
-    }
-
-    private static String format(boolean ds, boolean gp) {
-        if (ds) {
-            if (gp) {
-                return "GT:DS:GP";
-            }
-            else {
-                return "GT:DS";
-            }
-        }
-        else {
-            return "GT";
-        }
-    }
-
-    private static void printFixedFields(Marker marker, String info,
-            String format, PrintWriter out) {
-        out.print(marker);
-        out.print(Const.tab);
-        out.print(Const.MISSING_DATA_CHAR); // QUAL
-        out.print(Const.tab);
-        out.print(PASS);                    // FILTER
-        out.print(Const.tab);
-        out.print(info);
-        out.print(Const.tab);
-        out.print(format);
-    }
-
-    private static String formatProb(double d) {
-        if (d>=0 && d <= 0.5) {
-            return new BigDecimal(d).round(mc2).toPlainString();
-        }
-        else if (d <= 1.0) {
-            return new BigDecimal(d-1.0).round(mc2).add(ONE).toString();
-        }
-        else {
-            throw new IllegalArgumentException(String.valueOf(d));
-        }
-    }
 }

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/beagle.git



More information about the debian-med-commit mailing list