[med-svn] [SCM] picard-tools branch, upstream, updated. upstream/1.80-1-gb78487e

Olivier Sallou olivier.sallou at debian.org
Thu Jan 3 09:47:28 UTC 2013


The following commit has been merged in the upstream branch:
commit b78487ee745a707f4c8604053e25d1526ed2751f
Author: Olivier Sallou <olivier.sallou at debian.org>
Date:   Thu Jan 3 10:34:04 2013 +0100

    Imported Upstream version 1.82

diff --git a/Picard-public.ipr b/Picard-public.ipr
index 68b3a40..fd4cd67 100644
--- a/Picard-public.ipr
+++ b/Picard-public.ipr
@@ -35,7 +35,11 @@
       <entry name="?*.tld" />
       <entry name="?*.ftl" />
     </wildcardResourcePatterns>
-    <annotationProcessing enabled="false" useClasspath="true" />
+    <annotationProcessing>
+      <profile default="true" name="Default" enabled="false">
+        <processorPath useClasspath="true" />
+      </profile>
+    </annotationProcessing>
   </component>
   <component name="CopyrightManager" default="">
     <module2copyright />
@@ -92,6 +96,21 @@
           <option name="REPORT_VARIABLES" value="true" />
           <option name="REPORT_PARAMETERS" value="true" />
         </inspection_tool>
+        <inspection_tool class="UnusedDeclaration" enabled="false" level="WARNING" enabled_by_default="false">
+          <option name="ADD_MAINS_TO_ENTRIES" value="true" />
+          <option name="ADD_APPLET_TO_ENTRIES" value="true" />
+          <option name="ADD_SERVLET_TO_ENTRIES" value="true" />
+          <option name="ADD_NONJAVA_TO_ENTRIES" value="true" />
+        </inspection_tool>
+        <inspection_tool class="groupsTestNG" enabled="true" level="WARNING" enabled_by_default="true">
+          <option name="groups">
+            <value>
+              <list size="1">
+                <item index="0" class="java.lang.String" itemvalue="unix" />
+              </list>
+            </value>
+          </option>
+        </inspection_tool>
       </profile>
     </profiles>
     <option name="PROJECT_PROFILE" value="Project Default" />
@@ -247,6 +266,15 @@
   <component name="ProjectDetails">
     <option name="projectName" value="Picard-public" />
   </component>
+  <component name="ProjectDictionaryState">
+    <dictionary name="mccowan">
+      <words>
+        <w>inferer</w>
+        <w>inferrer</w>
+        <w>phread</w>
+      </words>
+    </dictionary>
+  </component>
   <component name="ProjectKey">
     <option name="state" value="https://picard.svn.sourceforge.net/svnroot/picard/trunk/Picard-public.ipr" />
   </component>
diff --git a/build.xml b/build.xml
index 4c82fe7..e379da4 100755
--- a/build.xml
+++ b/build.xml
@@ -38,7 +38,7 @@
     <!-- Get SVN revision, if available, otherwise leave it blank. -->
     <exec executable="svnversion" outputproperty="repository.revision" failifexecutionfails="false"/>
     <property name="repository.revision" value=""/>
-    <property name="sam-version" value="1.80"/>
+    <property name="sam-version" value="1.82"/>
     <property name="picard-version" value="${sam-version}"/>
     <property name="command_tmp" value=".command_tmp"/>
     <property name="command-line-html-dir" value="${dist}/html"/>
diff --git a/src/java/net/sf/picard/cmdline/CommandLineProgram.java b/src/java/net/sf/picard/cmdline/CommandLineProgram.java
index 2860fe4..df6192d 100644
--- a/src/java/net/sf/picard/cmdline/CommandLineProgram.java
+++ b/src/java/net/sf/picard/cmdline/CommandLineProgram.java
@@ -247,10 +247,21 @@ public abstract class CommandLineProgram {
         return commandLineParser;
     }
 
+    /**
+     * @return This is a little-used version string that can be put in the @Usage annotation
+     */
     public String getProgramVersion() {
         return commandLineParser.getProgramVersion();
     }
 
+
+    /**
+     * @return Version stored in the manifest of the jarfile.
+     */
+    public String getVersion() {
+        return getCommandLineParser().getVersion();
+    }
+
     public String getCommandLine() {
         return commandLine;
     }
diff --git a/src/java/net/sf/picard/fastq/FastqRecord.java b/src/java/net/sf/picard/fastq/FastqRecord.java
index 74fdd0c..e4830c6 100755
--- a/src/java/net/sf/picard/fastq/FastqRecord.java
+++ b/src/java/net/sf/picard/fastq/FastqRecord.java
@@ -28,14 +28,16 @@ package net.sf.picard.fastq;
  */
 public class FastqRecord {
 
-    private String seqHeaderPrefix;
-    private String seqLine;
-    private String qualHeaderPrefix;
-    private String qualLine;
+    private final String seqHeaderPrefix;
+    private final String seqLine;
+    private final String qualHeaderPrefix;
+    private final String qualLine;
 
     public FastqRecord(final String seqHeaderPrefix, final String seqLine, final String qualHeaderPrefix, final String qualLine) {
-        if (seqHeaderPrefix.length() > 0) this.seqHeaderPrefix = seqHeaderPrefix;
-        if (qualHeaderPrefix.length() > 0) this.qualHeaderPrefix = qualHeaderPrefix;
+        if (seqHeaderPrefix != null && seqHeaderPrefix.length() > 0) this.seqHeaderPrefix = seqHeaderPrefix;
+        else this.seqHeaderPrefix = null;
+        if (qualHeaderPrefix != null && qualHeaderPrefix.length() > 0) this.qualHeaderPrefix = qualHeaderPrefix;
+        else this.qualHeaderPrefix = null;
         this.seqLine = seqLine ;
         this.qualLine = qualLine ;
     }
diff --git a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
index 214ff4c..692a955 100644
--- a/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
+++ b/src/java/net/sf/picard/sam/AbstractAlignmentMerger.java
@@ -181,6 +181,20 @@ public abstract class AbstractAlignmentMerger {
     }
 
     /**
+     * Do this unconditionally, not just for aligned records, for two reasons:
+     * - An unaligned read has been processed by the aligner, so it is more truthful.
+     * - When chaining additional PG records, having all the records in the output file refer to the same PG
+     *   record means that only one chain will need to be created, rather than a chain for the mapped reads
+     *   and a separate chain for the unmapped reads.
+     */
+    private void maybeSetPgTag(final SAMRecord rec) {
+        if (this.programRecord != null) {
+            rec.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID, this.programRecord.getProgramGroupId());
+        }
+    }
+    /**
+
+    /**
      * Merges the alignment data with the non-aligned records from the source BAM file.
      */
     public void mergeAlignment() {
@@ -206,12 +220,15 @@ public abstract class AbstractAlignmentMerger {
         while (unmappedIterator.hasNext()) {
             // Load next unaligned read or read pair.
             final SAMRecord rec = unmappedIterator.next();
+
             rec.setHeader(this.header);
+            maybeSetPgTag(rec);
 
             final SAMRecord secondOfPair;
             if (rec.getReadPairedFlag()) {
                 secondOfPair = unmappedIterator.next();
                 secondOfPair.setHeader(this.header);
+                maybeSetPgTag(secondOfPair);
 
                 // Validate that paired reads arrive as first of pair followed by second of pair
                 if (!rec.getReadName().equals(secondOfPair.getReadName()))
@@ -362,10 +379,6 @@ public abstract class AbstractAlignmentMerger {
     private void transferAlignmentInfoToFragment(final SAMRecord unaligned, final SAMRecord aligned) {
         setValuesFromAlignment(unaligned, aligned);
         updateCigarForTrimmedOrClippedBases(unaligned, aligned);
-        if (this.programRecord != null) {
-            unaligned.setAttribute(ReservedTagConstants.PROGRAM_GROUP_ID,
-                this.programRecord.getProgramGroupId());
-        }
         if (SAMUtils.cigarMapsNoBasesToRef(unaligned.getCigar())) {
             SAMUtils.makeReadUnmapped(unaligned);
         }
diff --git a/src/java/net/sf/picard/sam/FastqToSam.java b/src/java/net/sf/picard/sam/FastqToSam.java
index c23dd93..0d95806 100644
--- a/src/java/net/sf/picard/sam/FastqToSam.java
+++ b/src/java/net/sf/picard/sam/FastqToSam.java
@@ -31,10 +31,7 @@ import net.sf.picard.cmdline.Usage;
 import net.sf.picard.fastq.FastqReader;
 import net.sf.picard.fastq.FastqRecord;
 import net.sf.picard.io.IoUtil;
-import net.sf.picard.util.FastqQualityFormat;
-import net.sf.picard.util.Log;
-import net.sf.picard.util.ProgressLogger;
-import net.sf.picard.util.SolexaQualityConverter;
+import net.sf.picard.util.*;
 import net.sf.samtools.*;
 import net.sf.samtools.SAMFileHeader.SortOrder;
 import net.sf.samtools.util.Iso8601Date;
@@ -66,7 +63,7 @@ public class FastqToSam extends CommandLineProgram {
 
     @Option(shortName="V", doc="A value describing how the quality values are encoded in the fastq.  Either Solexa for pre-pipeline 1.3 " +
             "style scores (solexa scaling + 66), Illumina for pipeline 1.3 and above (phred scaling + 64) or Standard for phred scaled " +
-            "scores with a character shift of 33.")
+            "scores with a character shift of 33.  If this value is not specified, the quality format will be detected automatically.", optional = true)
     public FastqQualityFormat QUALITY_FORMAT;
 
     @Option(doc="Output SAM/BAM file. ", shortName=StandardOptionDefinitions.OUTPUT_SHORT_NAME) 
@@ -120,6 +117,23 @@ public class FastqToSam extends CommandLineProgram {
 
     /* Simply invokes the right method for unpaired or paired data. */
     protected int doWork() {
+        if (QUALITY_FORMAT == null) {
+            final QualityEncodingDetector detector = new QualityEncodingDetector();
+            final FastqReader reader = new FastqReader(FASTQ);
+            if (FASTQ2 == null) {
+                detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, reader);
+            } else {
+                final FastqReader reader2 = new FastqReader(FASTQ2);       
+                detector.add(QualityEncodingDetector.DEFAULT_MAX_RECORDS_TO_ITERATE, reader, reader2);
+                reader2.close();
+            }
+            reader.close();
+            
+            QUALITY_FORMAT = detector.generateBestGuess(QualityEncodingDetector.FileContext.FASTQ);
+            if (detector.isDeterminationAmbiguous())
+                LOG.warn("Making ambiguous determination about fastq's quality encoding; more than one format possible based on observed qualities.");
+            LOG.info(String.format("Auto-detected quality format as: %s.", QUALITY_FORMAT));
+        }
         final int readCount = (FASTQ2 == null) ?  doUnpaired() : doPaired();
         LOG.info("Processed " + readCount + " fastq reads");
         return 0;
diff --git a/src/java/net/sf/picard/sam/MarkDuplicates.java b/src/java/net/sf/picard/sam/MarkDuplicates.java
index 265afcc..3dd9ab1 100644
--- a/src/java/net/sf/picard/sam/MarkDuplicates.java
+++ b/src/java/net/sf/picard/sam/MarkDuplicates.java
@@ -69,6 +69,26 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
     @Option(shortName="M", doc="File to write duplication metrics to")
     public File METRICS_FILE;
 
+    @Option(shortName=StandardOptionDefinitions.PROGRAM_RECORD_ID_SHORT_NAME,
+            doc="The program record ID for the @PG record(s) created by this program.  " +
+                    "Set to null to disable PG record creation.  This string may have a suffix appended to avoid " +
+            "collision with other program record IDs.",
+            optional=true)
+    public String PROGRAM_RECORD_ID = "MarkDuplicates";
+
+    @Option(shortName="PG_VERSION",
+            doc="Value of VN tag of PG record to be created. If not specified, the version will be detected automatically.",
+            optional=true)
+    public String PROGRAM_GROUP_VERSION;
+
+    @Option(shortName="PG_COMMAND",
+            doc="Value of CL tag of PG record to be created. If not supplied the command line will be detected automatically.",
+            optional=true)
+    public String PROGRAM_GROUP_COMMAND_LINE;
+
+    @Option(shortName="PG_NAME", doc="Value of PN tag of PG record to be created.")
+    public String PROGRAM_GROUP_NAME = "MarkDuplicates";
+
     @Option(doc="Comment(s) to include in the output file's header.", optional=true, shortName="CO")
     public List<String> COMMENT = new ArrayList<String>();
 
@@ -102,6 +122,13 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
     // Variables used for optical duplicate detection and tracking
     private final Histogram<Short> opticalDupesByLibraryId = new Histogram<Short>();
 
+    // All PG IDs seen in merged input files in first pass.  These are gather for two reasons:
+    // - to know how many different PG records to create to represent this program invocation.
+    // - to know what PG IDs are already used to avoid collisions when creating new ones.
+    // Note that if there are one or more records that do not have a PG tag, then a null value
+    // will be stored in this set.
+    private final Set<String> pgIdsSeen = new HashSet<String>();
+
     /** Stock main method. */
     public static void main(final String[] args) {
         System.exit(new MarkDuplicates().instanceMain(args));
@@ -134,6 +161,33 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
         final SAMFileHeader outputHeader = header.clone();
         outputHeader.setSortOrder(SAMFileHeader.SortOrder.coordinate);
         for (final String comment : COMMENT) outputHeader.addComment(comment);
+
+        // Key: previous PG ID on a SAM Record (or null).  Value: New PG ID to replace it.
+        final Map<String, String> chainedPgIds;
+        // Generate new PG record(s)
+        if (PROGRAM_RECORD_ID != null) {
+            final PgIdGenerator pgIdGenerator = new PgIdGenerator(outputHeader);
+            if (PROGRAM_GROUP_VERSION == null) {
+                PROGRAM_GROUP_VERSION = this.getVersion();
+            }
+            if (PROGRAM_GROUP_COMMAND_LINE == null) {
+                PROGRAM_GROUP_COMMAND_LINE = this.getCommandLine();
+            }
+            chainedPgIds = new HashMap<String, String>();
+            for (final String existingId : pgIdsSeen) {
+                final String newPgId = pgIdGenerator.getNonCollidingId(PROGRAM_RECORD_ID);
+                chainedPgIds.put(existingId, newPgId);
+                final SAMProgramRecord programRecord = new SAMProgramRecord(newPgId);
+                programRecord.setProgramVersion(PROGRAM_GROUP_VERSION);
+                programRecord.setCommandLine(PROGRAM_GROUP_COMMAND_LINE);
+                programRecord.setProgramName(PROGRAM_GROUP_NAME);
+                programRecord.setPreviousProgramGroupId(existingId);
+                outputHeader.addProgramRecord(programRecord);
+            }
+        } else {
+            chainedPgIds = null;
+        }
+
         final SAMFileWriter out = new SAMFileWriterFactory().makeSAMOrBAMWriter(outputHeader,
                                                                                 true,
                                                                                 OUTPUT);
@@ -206,6 +260,9 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
                 // do nothing
             }
             else {
+                if (PROGRAM_RECORD_ID != null) {
+                    rec.setAttribute(SAMTag.PG.name(), chainedPgIds.get(rec.getStringAttribute(SAMTag.PG.name())));
+                }
                 out.addAlignment(rec);
                 progress.record(rec);
             }
@@ -327,6 +384,13 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
 
         while (iterator.hasNext()) {
             final SAMRecord rec = iterator.next();
+
+            // This doesn't have anything to do with building sorted ReadEnd lists, but it can be done in the same pass
+            // over the input
+            if (PROGRAM_RECORD_ID != null) {
+                pgIdsSeen.add(rec.getStringAttribute(SAMTag.PG.name()));
+            }
+
             if (rec.getReadUnmappedFlag()) {
                 if (rec.getReferenceIndex() == -1) {
                     // When we hit the unmapped reads with no coordinate, no reason to continue.
@@ -607,7 +671,7 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
         final boolean[] opticalDuplicateFlags = findOpticalDuplicates(list, OPTICAL_DUPLICATE_PIXEL_DISTANCE);
 
         int opticalDuplicates = 0;
-        for (boolean b: opticalDuplicateFlags) if (b) ++opticalDuplicates;
+        for (final boolean b: opticalDuplicateFlags) if (b) ++opticalDuplicates;
         if (opticalDuplicates > 0) {
             this.opticalDupesByLibraryId.increment(list.get(0).libraryId, opticalDuplicates);
         }
@@ -658,4 +722,38 @@ public class MarkDuplicates extends AbstractDuplicateFindingAlgorithm {
             return retval;
         }
     }
+
+    static class PgIdGenerator {
+        private int recordCounter;
+
+        private final Set<String> idsThatAreAlreadyTaken = new HashSet<String>();
+
+        PgIdGenerator(final SAMFileHeader header) {
+            for (final SAMProgramRecord pgRecord : header.getProgramRecords()) {
+                idsThatAreAlreadyTaken.add(pgRecord.getProgramGroupId());
+            }
+            recordCounter = idsThatAreAlreadyTaken.size();
+        }
+
+        String getNonCollidingId(final String recordId) {
+            if(!idsThatAreAlreadyTaken.contains(recordId)) {
+                // don't remap 1st record. If there are more records
+                // with this id, they will be remapped in the 'else'.
+                idsThatAreAlreadyTaken.add(recordId);
+                ++recordCounter;
+                return recordId;
+            } else {
+                String newId;
+                // Below we tack on one of roughly 1.7 million possible 4 digit base36 at random. We do this because
+                // our old process of just counting from 0 upward and adding that to the previous id led to 1000s of
+                // calls idsThatAreAlreadyTaken.contains() just to resolve 1 collision when merging 1000s of similarly
+                // processed bams.
+                while(idsThatAreAlreadyTaken.contains(newId = recordId + "." + SamFileHeaderMerger.positiveFourDigitBase36Str(recordCounter++)));
+
+                idsThatAreAlreadyTaken.add( newId );
+                return newId;
+            }
+
+        }
+    }
 }
diff --git a/src/java/net/sf/picard/sam/SamFileValidator.java b/src/java/net/sf/picard/sam/SamFileValidator.java
index 1fe5e6c..2aa93fa 100644
--- a/src/java/net/sf/picard/sam/SamFileValidator.java
+++ b/src/java/net/sf/picard/sam/SamFileValidator.java
@@ -30,9 +30,7 @@ import net.sf.picard.metrics.MetricsFile;
 import net.sf.picard.reference.ReferenceSequence;
 import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFileWalker;
-import net.sf.picard.util.Histogram;
-import net.sf.picard.util.Log;
-import net.sf.picard.util.ProgressLogger;
+import net.sf.picard.util.*;
 import net.sf.samtools.*;
 import net.sf.samtools.SAMFileReader.ValidationStringency;
 import net.sf.samtools.SAMValidationError.Type;
@@ -48,15 +46,15 @@ import java.util.*;
  * <li>checks sam file header for read groups</li>
  * <li>for each sam record
  * <ul>
- *     <li>reports error detected by SAMRecord.isValid()</li>
- *     <li>validates NM (nucleotide differences) exists and matches reality</li>
- *     <li>validates mate fields agree with data in the mate record</li>
+ * <li>reports error detected by SAMRecord.isValid()</li>
+ * <li>validates NM (nucleotide differences) exists and matches reality</li>
+ * <li>validates mate fields agree with data in the mate record</li>
  * </ul>
  * </li>
  * </ul>
  *
- * @see SAMRecord#isValid()
  * @author Doug Voet
+ * @see SAMRecord#isValid()
  */
 public class SamFileValidator {
     private Histogram<Type> errorsByType = new Histogram<Type>();
@@ -80,7 +78,9 @@ public class SamFileValidator {
         this.maxTempFiles = maxTempFiles;
     }
 
-    /** Sets one or more error types that should not be reported on. */
+    /**
+     * Sets one or more error types that should not be reported on.
+     */
     public void setErrorsToIgnore(final Collection<Type> types) {
         if (!types.isEmpty()) {
             this.errorsToIgnore = EnumSet.copyOf(types);
@@ -93,7 +93,7 @@ public class SamFileValidator {
 
     /**
      * Outputs validation summary report to out.
-     * 
+     *
      * @param samReader records to validate
      * @param reference if null, NM tag validation is skipped
      * @return boolean  true if there are no validation errors, otherwise false
@@ -123,10 +123,10 @@ public class SamFileValidator {
 
     /**
      * Outputs validation error details to out.
-     * 
+     *
      * @param samReader records to validate
      * @param reference if null, NM tag validation is skipped
-     * processing will stop after this threshold has been reached
+     *                  processing will stop after this threshold has been reached
      * @return boolean  true if there are no validation errors, otherwise false
      */
     public boolean validateSamFileVerbose(final SAMFileReader samReader, final ReferenceSequenceFile reference) {
@@ -174,16 +174,16 @@ public class SamFileValidator {
             samReader.setValidationStringency(ValidationStringency.SILENT);
             validateHeader(samReader.getFileHeader());
             orderChecker = new SAMSortOrderChecker(samReader.getFileHeader().getSortOrder());
-            validateSamRecords(samReader, samReader.getFileHeader());
+            validateSamRecordsAndQualityFormat(samReader, samReader.getFileHeader());
             validateUnmatchedPairs();
-            if (validateIndex){
+            if (validateIndex) {
                 try {
-                   BamIndexValidator.exhaustivelyTestIndex(samReader);
-                } catch (Exception e){
+                    BamIndexValidator.exhaustivelyTestIndex(samReader);
+                } catch (Exception e) {
                     addError(new SAMValidationError(Type.INVALID_INDEX_FILE_POINTER, e.getMessage(), null));
                 }
             }
-            
+
             if (errorsByType.isEmpty()) {
                 out.println("No errors found");
             }
@@ -202,7 +202,7 @@ public class SamFileValidator {
             // For the coordinate-sorted map, need to detect mate pairs in which the mateReferenceIndex on one end
             // does not match the readReference index on the other end, so the pairs weren't united and validated.
             inMemoryPairMap = new InMemoryPairEndInfoMap();
-            CloseableIterator<Map.Entry<String, PairEndInfo>> it = ((CoordinateSortedPairEndInfoMap)pairEndInfoByName).iterator();
+            CloseableIterator<Map.Entry<String, PairEndInfo>> it = ((CoordinateSortedPairEndInfoMap) pairEndInfoByName).iterator();
             while (it.hasNext()) {
                 Map.Entry<String, PairEndInfo> entry = it.next();
                 PairEndInfo pei = inMemoryPairMap.remove(entry.getValue().readReferenceIndex, entry.getKey());
@@ -219,20 +219,28 @@ public class SamFileValidator {
             }
             it.close();
         } else {
-            inMemoryPairMap = (InMemoryPairEndInfoMap)pairEndInfoByName;
+            inMemoryPairMap = (InMemoryPairEndInfoMap) pairEndInfoByName;
         }
         // At this point, everything in InMemoryMap is a read marked as a pair, for which a mate was not found.
-        for (final Map.Entry<String, PairEndInfo> entry: inMemoryPairMap) {
+        for (final Map.Entry<String, PairEndInfo> entry : inMemoryPairMap) {
             addError(new SAMValidationError(Type.MATE_NOT_FOUND, "Mate not found for paired read", entry.getKey()));
         }
     }
 
-    private void validateSamRecords(final Iterable<SAMRecord> samRecords, final SAMFileHeader header) {
+    /**
+     * SAM record and quality format validations are combined into a single method because validation must be completed
+     * in only a single pass of the SamRecords (because a SamReader's iterator() method may not return the same
+     * records on a subsequent call).
+     */
+    private void validateSamRecordsAndQualityFormat(final Iterable<SAMRecord> samRecords, final SAMFileHeader header) {
         SAMRecordIterator iter = (SAMRecordIterator) samRecords.iterator();
         final ProgressLogger progress = new ProgressLogger(log, 10000000, "Validated Read");
+        final QualityEncodingDetector qualityDetector = new QualityEncodingDetector();
         try {
-            while(iter.hasNext()){
-                SAMRecord record = iter.next();
+            while (iter.hasNext()) {
+                final SAMRecord record = iter.next();
+
+                qualityDetector.add(record);
 
                 final long recordNumber = progress.getCount() + 1;
                 final Collection<SAMValidationError> errors = record.isValid();
@@ -242,7 +250,7 @@ public class SamFileValidator {
                         addError(error);
                     }
                 }
-                
+
                 validateMateFields(record, recordNumber);
                 validateSortOrder(record, recordNumber);
                 validateReadGroup(record, header);
@@ -259,6 +267,17 @@ public class SamFileValidator {
                 }
                 progress.record(record);
             }
+
+            try {
+                if (progress.getCount() > 0) { // Avoid exception being thrown as a result of no qualities being read
+                    final FastqQualityFormat format = qualityDetector.generateBestGuess(QualityEncodingDetector.FileContext.SAM);
+                    if (format != FastqQualityFormat.Standard) {
+                        addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, String.format("Detected %s quality score encoding, but expected %s.", format, FastqQualityFormat.Standard), null));
+                    }
+                }
+            } catch (PicardException e) {
+                addError(new SAMValidationError(Type.INVALID_QUALITY_FORMAT, e.getMessage(), null));
+            }
         } catch (SAMFormatException e) {
             // increment record number because the iterator behind the SAMFileReader
             // reads one record ahead so we will get this failure one record ahead
@@ -277,8 +296,7 @@ public class SamFileValidator {
         if (rg == null) {
             addError(new SAMValidationError(Type.RECORD_MISSING_READ_GROUP,
                     "A record is missing a read group", record.getReadName()));
-        }
-        else if (!header.getReadGroups().contains(rg)) {
+        } else if (!header.getReadGroups().contains(rg)) {
             addError(new SAMValidationError(Type.READ_GROUP_NOT_FOUND,
                     "A record has a read group not found in the header: ",
                     record.getReadName() + ", " + rg.getReadGroupId()));
@@ -299,7 +317,7 @@ public class SamFileValidator {
     }
 
     private void validateSecondaryBaseCalls(final SAMRecord record, final long recordNumber) {
-        final String e2 = (String)record.getAttribute(SAMTag.E2.name());
+        final String e2 = (String) record.getAttribute(SAMTag.E2.name());
         if (e2 != null) {
             if (e2.length() != record.getReadLength()) {
                 addError(new SAMValidationError(Type.MISMATCH_READ_LENGTH_AND_E2_LENGTH,
@@ -315,13 +333,13 @@ public class SamFileValidator {
                 if (SequenceUtil.basesEqual(bases[i], secondaryBases[i])) {
                     addError(new SAMValidationError(Type.E2_BASE_EQUALS_PRIMARY_BASE,
                             String.format("Secondary base call  (%c) == primary base call (%c)",
-                                    (char)secondaryBases[i], (char)bases[i]),
+                                    (char) secondaryBases[i], (char) bases[i]),
                             record.getReadName(), recordNumber));
                     break;
                 }
             }
         }
-        final String u2 = (String)record.getAttribute(SAMTag.U2.name());
+        final String u2 = (String) record.getAttribute(SAMTag.U2.name());
         if (u2 != null && u2.length() != record.getReadLength()) {
             addError(new SAMValidationError(Type.MISMATCH_READ_LENGTH_AND_U2_LENGTH,
                     String.format("U2 tag length (%d) != read length (%d)", u2.length(), record.getReadLength()),
@@ -352,18 +370,18 @@ public class SamFileValidator {
         final SAMRecord prev = orderChecker.getPreviousRecord();
         if (!orderChecker.isSorted(record)) {
             addError(new SAMValidationError(
-                    Type.RECORD_OUT_OF_ORDER, 
+                    Type.RECORD_OUT_OF_ORDER,
                     String.format(
                             "The record is out of [%s] order, prior read name [%s], prior coodinates [%d:%d]",
                             record.getHeader().getSortOrder().name(),
                             prev.getReadName(),
                             prev.getReferenceIndex(),
                             prev.getAlignmentStart()),
-                    record.getReadName(), 
+                    record.getReadName(),
                     recordNumber));
         }
     }
-    
+
     private void init(final ReferenceSequenceFile reference, final SAMFileHeader header) {
         if (header.getSortOrder() == SAMFileHeader.SortOrder.coordinate) {
             this.pairEndInfoByName = new CoordinateSortedPairEndInfoMap();
@@ -386,8 +404,8 @@ public class SamFileValidator {
             final Integer tagNucleotideDiffs = record.getIntegerAttribute(ReservedTagConstants.NM);
             if (tagNucleotideDiffs == null) {
                 addError(new SAMValidationError(
-                        Type.MISSING_TAG_NM, 
-                        "NM tag (nucleotide differences) is missing", 
+                        Type.MISSING_TAG_NM,
+                        "NM tag (nucleotide differences) is missing",
                         record.getReadName(),
                         recordNumber));
             } else if (refFileWalker != null) {
@@ -397,27 +415,27 @@ public class SamFileValidator {
 
                 if (!tagNucleotideDiffs.equals(actualNucleotideDiffs)) {
                     addError(new SAMValidationError(
-                            Type.INVALID_TAG_NM, 
+                            Type.INVALID_TAG_NM,
                             "NM tag (nucleotide differences) in file [" + tagNucleotideDiffs +
-                            "] does not match reality [" + actualNucleotideDiffs + "]", 
+                                    "] does not match reality [" + actualNucleotideDiffs + "]",
                             record.getReadName(),
                             recordNumber));
                 }
             }
         }
     }
-    
+
     private void validateMateFields(final SAMRecord record, final long recordNumber) {
         if (!record.getReadPairedFlag() || record.getNotPrimaryAlignmentFlag()) {
             return;
         }
-        
+
         final PairEndInfo pairEndInfo = pairEndInfoByName.remove(record.getReferenceIndex(), record.getReadName());
         if (pairEndInfo == null) {
             pairEndInfoByName.put(record.getMateReferenceIndex(), record.getReadName(), new PairEndInfo(record, recordNumber));
         } else {
             final List<SAMValidationError> errors =
-                pairEndInfo.validateMates(new PairEndInfo(record, recordNumber), record.getReadName());
+                    pairEndInfo.validateMates(new PairEndInfo(record, recordNumber), record.getReadName());
             for (final SAMValidationError error : errors) {
                 addError(error);
             }
@@ -443,8 +461,8 @@ public class SamFileValidator {
             addError(new SAMValidationError(Type.MISSING_READ_GROUP, "Read groups is empty", null));
         }
         List<SAMProgramRecord> pgs = fileHeader.getProgramRecords();
-        for (int i = 0; i < pgs.size()-1; i++) {
-            for (int j=i+1; j < pgs.size(); j++) {
+        for (int i = 0; i < pgs.size() - 1; i++) {
+            for (int j = i + 1; j < pgs.size(); j++) {
                 if (pgs.get(i).getProgramGroupId().equals(pgs.get(j).getProgramGroupId())) {
                     addError(new SAMValidationError(Type.DUPLICATE_PROGRAM_GROUP_ID, "Duplicate " +
                             "program group id: " + pgs.get(i).getProgramGroupId(), null));
@@ -453,8 +471,8 @@ public class SamFileValidator {
         }
 
         List<SAMReadGroupRecord> rgs = fileHeader.getReadGroups();
-        for (int i = 0; i < rgs.size()-1; i++) {
-            for (int j=i+1; j < rgs.size(); j++) {
+        for (int i = 0; i < rgs.size() - 1; i++) {
+            for (int j = i + 1; j < rgs.size(); j++) {
                 if (rgs.get(i).getReadGroupId().equals(rgs.get(j).getReadGroupId())) {
                     addError(new SAMValidationError(Type.DUPLICATE_READ_GROUP_ID, "Duplicate " +
                             "read group id: " + rgs.get(i).getReadGroupId(), null));
@@ -482,7 +500,8 @@ public class SamFileValidator {
 
     /**
      * Control verbosity
-     * @param verbose True in order to emit a message per error or warning.
+     *
+     * @param verbose          True in order to emit a message per error or warning.
      * @param maxVerboseOutput If verbose, emit no more than this many messages.  Ignored if !verbose.
      */
     public void setVerbose(final boolean verbose, final int maxVerboseOutput) {
@@ -490,8 +509,13 @@ public class SamFileValidator {
         this.maxVerboseOutput = maxVerboseOutput;
     }
 
-    public boolean isBisulfiteSequenced() { return bisulfiteSequenced; }
-    public void setBisulfiteSequenced(boolean bisulfiteSequenced) { this.bisulfiteSequenced = bisulfiteSequenced; }
+    public boolean isBisulfiteSequenced() {
+        return bisulfiteSequenced;
+    }
+
+    public void setBisulfiteSequenced(boolean bisulfiteSequenced) {
+        this.bisulfiteSequenced = bisulfiteSequenced;
+    }
 
     public SamFileValidator setValidateIndex(boolean validateIndex) {
         // The SAMFileReader must also have IndexCaching enabled to have the index validated,
@@ -502,8 +526,8 @@ public class SamFileValidator {
 
     public static class ValidationMetrics extends MetricBase {
     }
-    
-    /** 
+
+    /**
      * This class is used so we don't have to store the entire SAMRecord in memory while we wait
      * to find a record's mate and also to store the record number.
      */
@@ -519,17 +543,17 @@ public class SamFileValidator {
         private final boolean mateUnmappedFlag;
 
         private final boolean firstOfPairFlag;
-        
+
         private final long recordNumber;
-        
+
         public PairEndInfo(final SAMRecord record, final long recordNumber) {
             this.recordNumber = recordNumber;
-            
+
             this.readAlignmentStart = record.getAlignmentStart();
             this.readNegStrandFlag = record.getReadNegativeStrandFlag();
             this.readReferenceIndex = record.getReferenceIndex();
             this.readUnmappedFlag = record.getReadUnmappedFlag();
-            
+
             this.mateAlignmentStart = record.getMateAlignmentStart();
             this.mateNegStrandFlag = record.getMateNegativeStrandFlag();
             this.mateReferenceIndex = record.getMateReferenceIndex();
@@ -559,7 +583,7 @@ public class SamFileValidator {
             validateMateFields(mate, this, readName, errors);
             // Validations that should not be repeated on both ends
             if (this.firstOfPairFlag == mate.firstOfPairFlag) {
-                final String whichEnd = this.firstOfPairFlag? "first": "second";
+                final String whichEnd = this.firstOfPairFlag ? "first" : "second";
                 errors.add(new SAMValidationError(
                         Type.MATES_ARE_SAME_END,
                         "Both mates are marked as " + whichEnd + " of pair",
@@ -569,40 +593,42 @@ public class SamFileValidator {
             }
             return errors;
         }
-        
+
         private void validateMateFields(final PairEndInfo end1, final PairEndInfo end2, final String readName, final List<SAMValidationError> errors) {
             if (end1.mateAlignmentStart != end2.readAlignmentStart) {
                 errors.add(new SAMValidationError(
-                        Type.MISMATCH_MATE_ALIGNMENT_START, 
-                        "Mate alignment does not match alignment start of mate", 
+                        Type.MISMATCH_MATE_ALIGNMENT_START,
+                        "Mate alignment does not match alignment start of mate",
                         readName,
                         end1.recordNumber));
             }
             if (end1.mateNegStrandFlag != end2.readNegStrandFlag) {
                 errors.add(new SAMValidationError(
-                        Type.MISMATCH_FLAG_MATE_NEG_STRAND, 
-                        "Mate negative strand flag does not match read negative strand flag of mate", 
+                        Type.MISMATCH_FLAG_MATE_NEG_STRAND,
+                        "Mate negative strand flag does not match read negative strand flag of mate",
                         readName,
                         end1.recordNumber));
             }
             if (end1.mateReferenceIndex != end2.readReferenceIndex) {
                 errors.add(new SAMValidationError(
-                        Type.MISMATCH_MATE_REF_INDEX, 
-                        "Mate reference index (MRNM) does not match reference index of mate", 
+                        Type.MISMATCH_MATE_REF_INDEX,
+                        "Mate reference index (MRNM) does not match reference index of mate",
                         readName,
                         end1.recordNumber));
             }
             if (end1.mateUnmappedFlag != end2.readUnmappedFlag) {
                 errors.add(new SAMValidationError(
-                        Type.MISMATCH_FLAG_MATE_UNMAPPED, 
-                        "Mate unmapped flag does not match read unmapped flag of mate", 
+                        Type.MISMATCH_FLAG_MATE_UNMAPPED,
+                        "Mate unmapped flag does not match read unmapped flag of mate",
                         readName,
                         end1.recordNumber));
             }
         }
     }
-    
-    /** Thrown in addError indicating that maxVerboseOutput has been exceeded and processing should stop */
+
+    /**
+     * Thrown in addError indicating that maxVerboseOutput has been exceeded and processing should stop
+     */
     private static class MaxOutputExceededException extends PicardException {
         MaxOutputExceededException() {
             super("maxVerboseOutput exceeded.");
@@ -611,7 +637,9 @@ public class SamFileValidator {
 
     interface PairEndInfoMap extends Iterable<Map.Entry<String, PairEndInfo>> {
         void put(int mateReferenceIndex, String key, PairEndInfo value);
+
         PairEndInfo remove(int mateReferenceIndex, String key);
+
         CloseableIterator<Map.Entry<String, PairEndInfo>> iterator();
     }
 
@@ -624,7 +652,7 @@ public class SamFileValidator {
         }
 
         public PairEndInfo remove(int mateReferenceIndex, String key) {
-            return onDiskMap.remove(mateReferenceIndex, key); 
+            return onDiskMap.remove(mateReferenceIndex, key);
         }
 
         public CloseableIterator<Map.Entry<String, PairEndInfo>> iterator() {
@@ -635,8 +663,13 @@ public class SamFileValidator {
             private DataInputStream in;
             private DataOutputStream out;
 
-            public void setOutputStream(final OutputStream os) { this.out = new DataOutputStream(os); }
-            public void setInputStream(final InputStream is) { this.in = new DataInputStream(is); }
+            public void setOutputStream(final OutputStream os) {
+                this.out = new DataOutputStream(os);
+            }
+
+            public void setInputStream(final InputStream is) {
+                this.in = new DataInputStream(is);
+            }
 
             public void encode(String key, PairEndInfo record) {
                 try {
diff --git a/src/java/net/sf/picard/util/IlluminaUtil.java b/src/java/net/sf/picard/util/IlluminaUtil.java
index 5e5961a..6736457 100644
--- a/src/java/net/sf/picard/util/IlluminaUtil.java
+++ b/src/java/net/sf/picard/util/IlluminaUtil.java
@@ -158,7 +158,11 @@ public class IlluminaUtil {
                      "AGATCGGAAGAGCACACGTCTGAACTCCAGTCACNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG"),
 
         FLUIDIGM("AATGATACGGCGACCACCGAGATCTACACTGACGACATGGTTCTACA",
-                 "AGACCAAGTCTCTGCTACCGTANNNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG");
+                 "AGACCAAGTCTCTGCTACCGTANNNNNNNNNNATCTCGTATGCCGTCTTCTGCTTG"),
+
+        TRUSEQ_SMALLRNA("AATGATACGGCGACCACCGAGATCTACACGTTCAGAGTTCTACAGTCCGACGATC",
+                        "TGGAATTCTCGGGTGCCAAGGAACTCCAGTCACNNNNNNATCTCGTATGCCGTCTTCTGCTTG")
+        ;
 
         final String fivePrime, threePrime, fivePrimeReadOrder;
         final byte[]  fivePrimeBytes, threePrimeBytes, fivePrimeReadOrderBytes;
diff --git a/src/java/net/sf/picard/util/QualityEncodingDetector.java b/src/java/net/sf/picard/util/QualityEncodingDetector.java
new file mode 100644
index 0000000..def47fe
--- /dev/null
+++ b/src/java/net/sf/picard/util/QualityEncodingDetector.java
@@ -0,0 +1,337 @@
+package net.sf.picard.util;
+
+import net.sf.picard.PicardException;
+import net.sf.picard.fastq.FastqReader;
+import net.sf.picard.fastq.FastqRecord;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordIterator;
+
+import java.util.*;
+
+import static java.util.Arrays.asList;
+
+/**
+ * Utility for determining the type of quality encoding/format (see FastqQualityFormat) used in a SAM/BAM or Fastq.
+ * <p/>
+ * To use this class, invoke the detect() method with a SAMFileReader or FastqReader, as appropriate.  The consumer is
+ * responsible for closing readers.
+ *
+ * @author mccowan at broadinstitute.org
+ */
+public class QualityEncodingDetector {
+
+    private QualityRecordAggregator qualityAggregator = new QualityRecordAggregator();
+
+    /**
+     * The maximum number of records over which the detector will iterate before making a determination, by default.
+     */
+    public static final long DEFAULT_MAX_RECORDS_TO_ITERATE = 10000;
+    private static final Log log = Log.getInstance(QualityEncodingDetector.class);
+
+    public enum FileContext {FASTQ, SAM}
+
+    static class Range {
+        final int low, high;
+
+        Range(final int low, final int high) {
+            this.low = low;
+            this.high = high;
+        }
+
+        boolean contains(final int value) {
+            return value <= high && value >= low;
+        }
+    }
+
+    /**
+     * Collection of data about the different quality formats and how they are interpreted.
+     */
+    enum QualityScheme {
+        Phred(
+                new Range(0, 93),           // Raw value range
+                new Range(33, 126),         // ASCII value range
+                asList(new Range(33, 58)),  // Ranges into which we expect at least one ASCII value to fall
+                FastqQualityFormat.Standard // Associated quality format
+        ),
+        Solexa(
+                new Range(-5, 62),
+                new Range(59, 126),
+                new ArrayList<Range>(),
+                FastqQualityFormat.Solexa
+        ),
+        Illumina(
+                new Range(0, 62),
+                new Range(64, 126),
+                new ArrayList<Range>(),
+                FastqQualityFormat.Illumina
+        );
+        final Range rawRange, asciiRange;
+        /**
+         * Ranges into which we expect at least one value to fall if this formatting is being used.  For example, for
+         * Standard encoding, we expect to at least one ASCII value between 33 and 58 (0 and 25); otherwise, it's
+         * probably not Standard-encoded.
+         */
+        final List<Range> expectedAsciiRanges;
+        final FastqQualityFormat qualityFormat;
+
+        QualityScheme(final Range rawRange, final Range asciiRange, final List<Range> expectedAsciiRanges, final FastqQualityFormat qualityFormat) {
+            this.rawRange = rawRange;
+            this.asciiRange = asciiRange;
+            this.expectedAsciiRanges = expectedAsciiRanges;
+            this.qualityFormat = qualityFormat;
+        }
+    }
+
+    /**
+     * Collecting reads and their quality scores for later analysis.  Uses ASCII values since those are the inherently
+     * "raw-est", read unmodified from the file.
+     */
+    private static class QualityRecordAggregator {
+        private Set<Integer> observedAsciiQualities = new HashSet<Integer>();
+
+        public Set<Integer> getObservedAsciiQualities() {
+            return Collections.unmodifiableSet(observedAsciiQualities);
+        }
+
+        /**
+         * Adds the FastqRecord's quality scores.
+         */
+        public void add(final FastqRecord fastqRecord) {
+            addAsciiQuality(fastqRecord.getBaseQualityString().getBytes());
+        }
+
+        /**
+         * Adds the SAMRecord's quality scores.
+         * <p/>
+         * Does not assume Phred quality encoding (for obvious reasons); getBaseQualityString() is used to read the
+         * unmodified ASCII score.  To elaborate, SAMFileReader, which is generating these SAMRecords, builds the
+         * SAMRecord by subtracting a value from each quality score and storing that transformed value internally.
+         * Since we desire original scores here (whatever was in the file to begin with), we effectively undo this
+         * transformation by asking SAMRecord to convert the quality back into the ASCII that was read in the file.
+         */
+        public void add(final SAMRecord samRecord) {
+            addAsciiQuality(samRecord.getBaseQualityString().getBytes());
+        }
+
+        private void addAsciiQuality(final byte... asciiQualities) {
+            for (final byte asciiQuality : asciiQualities) {
+                observedAsciiQualities.add((int) asciiQuality);
+            }
+        }
+    }
+
+    /**
+     * Adds the provided reader's records to the detector.
+     * @return The number of records read
+     */
+    public long add(final long maxRecords, final FastqReader... readers) {
+        final Iterator<FastqRecord> iterator = generateInterleavedFastqIterator(readers);
+        long recordCount = 0;
+        while (iterator.hasNext() && recordCount++ != maxRecords) {
+            this.add(iterator.next());
+        }
+        log.debug(String.format("Read %s records from %s.", recordCount, Arrays.toString(readers)));
+        return recordCount;
+    }
+
+    /**
+     * Adds the provided reader's records to the detector.
+     * @return The number of records read
+     */
+    public long add(final long maxRecords, final SAMFileReader reader) {
+        final SAMRecordIterator iterator = reader.iterator();
+        long recordCount = 0;
+        try {
+            while (iterator.hasNext() && recordCount++ != maxRecords) {
+                this.add(iterator.next());
+            }
+            
+            return recordCount;
+        } finally {
+            iterator.close();
+        }
+    }
+
+    /**
+     * Adds the provided record's qualities to the detector.
+     */
+    public void add(final FastqRecord fastqRecord) {
+        this.qualityAggregator.add(fastqRecord);
+    }
+
+    /**
+     * Adds the provided record's qualities to the detector.
+     */
+    public void add(final SAMRecord samRecord) {
+        this.qualityAggregator.add(samRecord);
+    }
+
+    /**
+     * Tests whether or not the detector can make a determination without guessing (i.e., if all but one quality format
+     * can be excluded using established exclusion conventions).
+     *
+     * @return True if more than one format is possible after exclusions; false otherwise
+     */
+    public boolean isDeterminationAmbiguous() {
+        return this.generateCandidateQualities().size() > 1;
+    }
+
+    /**
+     * Processes collected quality data and applies rules to determine which quality formats are possible.
+     * <p/>
+     * Specifically, for each format's known range of possible values (its "quality scheme"), exclude formats if any
+     * observed values fall outside of that range.  Additionally, exclude formats for which we expect to see at
+     * least one quality in a range of values, but do not.  (For example, for Phred, we expect to eventually see
+     * a value below 58.  If we never see such a value, we exclude Phred as a possible format.)
+     */
+    public EnumSet<FastqQualityFormat> generateCandidateQualities() {
+        final EnumSet<FastqQualityFormat> candidateFormats = EnumSet.allOf(FastqQualityFormat.class);
+        final Set<Integer> observedAsciiQualities = this.qualityAggregator.getObservedAsciiQualities();
+        if (observedAsciiQualities.isEmpty())
+            throw new PicardException("Cannot determine candidate qualities: no qualities found.");
+
+        for (final QualityScheme scheme : QualityScheme.values()) {
+            final Iterator<Integer> qualityBinIterator = observedAsciiQualities.iterator();
+            final Collection<Range> remainingExpectedValueRanges = new ArrayList<Range>(scheme.expectedAsciiRanges);
+            while (qualityBinIterator.hasNext()) {
+                final int quality = qualityBinIterator.next();
+                if (!scheme.asciiRange.contains(quality)) {
+                    candidateFormats.remove(scheme.qualityFormat);
+                }
+
+                final Iterator<Range> expectedValueRangeIterator = remainingExpectedValueRanges.iterator();
+                while (expectedValueRangeIterator.hasNext()) {
+                    if (expectedValueRangeIterator.next().contains(quality)) {
+                        expectedValueRangeIterator.remove();
+                    }
+                }
+            }
+
+            /**
+             * We remove elements from this list as we observe values in the corresponding range; if the list isn't 
+             * empty, we haven't seen a value in that range.  In other words, we haven't seen a value we expected.
+             * Consequently, we remove the corresponding format from the running possibilities.
+             */
+            if (!remainingExpectedValueRanges.isEmpty()) {
+                candidateFormats.remove(scheme.qualityFormat);
+            }
+        }
+
+        return candidateFormats;
+    }
+
+    /**
+     * Based on the quality scores accumulated in the detector as well as the context in which this guess applies (a BAM
+     * or fastq), determines the best guess as to the quality format.
+     * <p/>
+     * This method does not exclude any quality formats based on observed quality values; even if there remain multiple
+     * candidate qualities (see generateCandidateQualities()), picks a single one based on a high-level logic.
+     *
+     * @param context The type of file for which the guess is being made, Fastq or Bam
+     */
+    public FastqQualityFormat generateBestGuess(final FileContext context) {
+        final EnumSet<FastqQualityFormat> possibleFormats = this.generateCandidateQualities();
+        switch (possibleFormats.size()) {
+            case 1:
+                return possibleFormats.iterator().next();
+            case 2:
+                if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Solexa))) {
+                    return FastqQualityFormat.Illumina;
+                } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Illumina, FastqQualityFormat.Standard))) {
+                    switch (context) {
+                        case FASTQ:
+                            return FastqQualityFormat.Illumina;
+                        case SAM:
+                            return FastqQualityFormat.Standard;
+                    }
+                } else if (possibleFormats.equals(EnumSet.of(FastqQualityFormat.Standard, FastqQualityFormat.Solexa))) {
+                    throw new PicardException("The quality format cannot be determined: both Phred and Solexa formats are possible; this application's logic does not handle this scenario.");
+                } else throw new PicardException("Unreachable code.");
+            case 3:
+                throw new PicardException("The quality format cannot be determined: no formats were excluded.");
+            case 0:
+                throw new PicardException("The quality format cannot be determined: all formats were excluded.");
+            default:
+                throw new PicardException("Unreachable code.");
+        }
+    }
+
+    /**
+     * Interleaves FastqReader iterators so that serial-iteration of the result cycles between the constituent iterators.
+     */
+    private static Iterator<FastqRecord> generateInterleavedFastqIterator(final FastqReader... readers) {
+        return new Iterator<FastqRecord>() {
+            private Queue<Iterator<FastqRecord>> queue = new LinkedList<Iterator<FastqRecord>>();
+
+            {
+                for (final FastqReader reader : readers) {
+                    queue.add(reader.iterator());
+                }
+            }
+
+            public boolean hasNext() {
+                // If this returns true, the head of the queue will have a next element
+                while (!queue.isEmpty()) {
+                    if (queue.peek().hasNext()) {
+                        return true;
+                    }
+                    queue.poll();
+                }
+                return false;
+            }
+
+            public FastqRecord next() {
+                if (!hasNext()) throw new NoSuchElementException();
+                final Iterator<FastqRecord> i = queue.poll();
+                final FastqRecord result = i.next();
+                queue.offer(i);
+                return result;
+            }
+
+            public void remove() {
+                throw new UnsupportedOperationException();
+            }
+        };
+    }
+
+    /**
+     * Reads through the records in the provided fastq reader and uses their quality scores to determine the quality
+     * format used in the fastq.
+     *
+     * @param readers    The fastq readers from which qualities are to be read; at least one must be provided
+     * @param maxRecords The maximum number of records to read from the reader before making a determination (a guess,
+     *                   so more records is better)
+     * @return The determined quality format
+     */
+    public static FastqQualityFormat detect(final long maxRecords, final FastqReader... readers) {
+        final QualityEncodingDetector detector = new QualityEncodingDetector();
+        final long recordCount = detector.add(maxRecords, readers);
+        log.debug(String.format("Read %s records from %s.", recordCount, Arrays.toString(readers)));
+        return detector.generateBestGuess(FileContext.FASTQ);
+    }
+
+    public static FastqQualityFormat detect(final FastqReader... readers) {
+        return detect(DEFAULT_MAX_RECORDS_TO_ITERATE, readers);
+    }
+
+    /**
+     * Reads through the records in the provided SAM reader and uses their quality scores to determine the quality
+     * format used in the SAM.
+     *
+     * @param reader     The SAM reader from which records are to be read
+     * @param maxRecords The maximum number of records to read from the reader before making a determination (a guess,
+     *                   so more records is better)
+     * @return The determined quality format
+     */
+    public static FastqQualityFormat detect(final long maxRecords, final SAMFileReader reader) {
+        final QualityEncodingDetector detector = new QualityEncodingDetector();
+        final long recordCount = detector.add(maxRecords, reader);
+        log.debug(String.format("Read %s records from %s.", recordCount, reader));
+        return detector.generateBestGuess(FileContext.SAM);
+    }
+
+    public static FastqQualityFormat detect(final SAMFileReader reader) {
+        return detect(DEFAULT_MAX_RECORDS_TO_ITERATE, reader);
+    }
+}
\ No newline at end of file
diff --git a/src/java/net/sf/samtools/SAMFileTruncatedReader.java b/src/java/net/sf/samtools/SAMFileTruncatedReader.java
new file mode 100644
index 0000000..7b2c1bd
--- /dev/null
+++ b/src/java/net/sf/samtools/SAMFileTruncatedReader.java
@@ -0,0 +1,68 @@
+package net.sf.samtools;
+
+import java.io.File;
+import java.util.NoSuchElementException;
+
+/**
+ * A truncated form of a SAMFileReader that iterates over a limited number of records.
+ *
+ * @author mccowan at broadinstitute.org
+ */
+public class SAMFileTruncatedReader extends SAMFileReader {
+    private class TruncatedIterator implements SAMRecordIterator {
+        final SAMRecordIterator i;
+        final long max;
+        long currentRecord = 0;
+
+        TruncatedIterator(final SAMRecordIterator i, final long max) {
+            this.i = i;
+            this.max = max;
+        }
+
+        public boolean hasNext() {
+            return i.hasNext() && max != currentRecord;
+        }
+
+        public SAMRecord next() {
+            if (this.hasNext()) {
+                currentRecord += 1;
+                return i.next();
+            } else {
+                throw new NoSuchElementException();
+            }
+        }
+
+        public void remove() {
+            i.remove();
+        }
+
+        public void close() {
+            i.close();
+        }
+
+        public SAMRecordIterator assertSorted(final SAMFileHeader.SortOrder sortOrder) {
+            return i.assertSorted(sortOrder);
+        }
+    }
+
+    private final long maxRecordsToIterate;
+    
+    /**
+     * @param input The SAM file
+     * @param max   The maximum number of records to read from the file via iterator() methods
+     */
+    public SAMFileTruncatedReader(final File input, final long max) {
+        super(input);
+        this.maxRecordsToIterate = max;
+    }
+    
+    @Override
+    public SAMRecordIterator iterator() {
+        return new TruncatedIterator(super.iterator(), maxRecordsToIterate);
+    }
+
+    @Override
+    public SAMRecordIterator iterator(final SAMFileSpan chunks) {
+        return new TruncatedIterator(super.iterator(chunks), maxRecordsToIterate);
+    }
+}
diff --git a/src/java/net/sf/samtools/SAMFileWriterFactory.java b/src/java/net/sf/samtools/SAMFileWriterFactory.java
index 3788bec..5b3e675 100644
--- a/src/java/net/sf/samtools/SAMFileWriterFactory.java
+++ b/src/java/net/sf/samtools/SAMFileWriterFactory.java
@@ -172,13 +172,13 @@ public class SAMFileWriterFactory {
 
     private void initializeBAMWriter(final BAMFileWriter writer, final SAMFileHeader header, final boolean presorted, final boolean createIndex) {
         writer.setSortOrder(header.getSortOrder(), presorted);
+        if (maxRecordsInRam != null) {
+            writer.setMaxRecordsInRam(maxRecordsInRam);
+        }
         writer.setHeader(header);
         if (createIndex && writer.getSortOrder().equals(SAMFileHeader.SortOrder.coordinate)){
             writer.enableBamIndexConstruction();
         }
-        if (maxRecordsInRam != null) {
-            writer.setMaxRecordsInRam(maxRecordsInRam);
-        }
     }
 
     /**
diff --git a/src/java/net/sf/samtools/SAMLineParser.java b/src/java/net/sf/samtools/SAMLineParser.java
new file mode 100644
index 0000000..91b963e
--- /dev/null
+++ b/src/java/net/sf/samtools/SAMLineParser.java
@@ -0,0 +1,457 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+import java.io.File;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+import net.sf.samtools.util.StringUtil;
+
+/**
+ * this class enables creation of a SAMRecord object from a String in SAM text format.
+ */
+public class SAMLineParser {
+
+    // From SAM specification
+    private static final int QNAME_COL = 0;
+    private static final int FLAG_COL = 1;
+    private static final int RNAME_COL = 2;
+    private static final int POS_COL = 3;
+    private static final int MAPQ_COL = 4;
+    private static final int CIGAR_COL = 5;
+    private static final int MRNM_COL = 6;
+    private static final int MPOS_COL = 7;
+    private static final int ISIZE_COL = 8;
+    private static final int SEQ_COL = 9;
+    private static final int QUAL_COL = 10;
+
+    private static final int NUM_REQUIRED_FIELDS = 11;
+
+    // Read string must contain only these characters
+    private static final Pattern VALID_BASES = Pattern
+            .compile("^[acmgrsvtwyhkdbnACMGRSVTWYHKDBN.=]+$");
+
+    /**
+     * Allocate this once rather than for every line as a performance
+     * optimization. The size is arbitrary -- merely large enough to handle the
+     * maximum number of fields we might expect from a reasonable SAM file.
+     */
+    private final String[] mFields = new String[10000];
+
+    /**
+     * Add information about the origin (reader and position) to SAM records.
+     */
+    private final SAMFileReader mParentReader;
+    private final SAMRecordFactory samRecordFactory;
+    private final SAMFileReader.ValidationStringency validationStringency;
+    private final SAMFileHeader mFileHeader;
+    private final File mFile;
+
+    private final TextTagCodec tagCodec = new TextTagCodec();
+
+    private int currentLineNumber;
+    private String currentLine;
+
+    //
+    // Constructors
+    //
+
+    /**
+     * Public constructor. Use the default SAMRecordFactory and stringency.
+     * @param samFileHeader SAM file header
+     */
+    public SAMLineParser(final SAMFileHeader samFileHeader) {
+
+        this(new DefaultSAMRecordFactory(),
+                SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY, samFileHeader,
+                null, null);
+    }
+
+    /**
+     * Public constructor. Use the default SAMRecordFactory and stringency.
+     * @param samFileHeader SAM file header
+     * @param samFileReader SAM file reader For passing to SAMRecord.setFileSource, may be null.
+     * @param samFile SAM file being read (for error message only, may be null)
+     */
+    public SAMLineParser(final SAMFileHeader samFileHeader,
+                         final SAMFileReader samFileReader, final File samFile) {
+
+        this(new DefaultSAMRecordFactory(),
+                SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY, samFileHeader,
+                samFileReader, samFile);
+    }
+
+    /**
+     * Public constructor.
+     * @param samRecordFactory SamRecord Factory
+     * @param validationStringency validation stringency
+     * @param samFileHeader SAM file header
+     * @param samFileReader SAM file reader For passing to SAMRecord.setFileSource, may be null.
+     * @param samFile SAM file being read (for error message only, may be null)
+     */
+    public SAMLineParser(final SAMRecordFactory samRecordFactory,
+                         final SAMFileReader.ValidationStringency validationStringency,
+                         final SAMFileHeader samFileHeader, final SAMFileReader samFileReader,
+                         final File samFile) {
+
+        if (samRecordFactory == null)
+            throw new NullPointerException("The SamRecordFactory must be set");
+
+        if (validationStringency == null)
+            throw new NullPointerException("The validationStringency must be set");
+
+        if (samFileHeader == null)
+            throw new NullPointerException("The mFileHeader must be set");
+
+        this.samRecordFactory = samRecordFactory;
+        this.validationStringency = validationStringency;
+        this.mFileHeader = samFileHeader;
+
+        // Can be null
+        this.mParentReader = samFileReader;
+
+        // Can be null
+        this.mFile = samFile;
+    }
+
+    /**
+     * Get the File header.
+     * @return the SAM file header
+     */
+    private SAMFileHeader getFileHeader() {
+
+        return this.mFileHeader;
+    }
+
+    /**
+     * Get validation stringency.
+     * @return validation stringency
+     */
+    private SAMFileReader.ValidationStringency getValidationStringency() {
+
+        return this.validationStringency;
+    }
+
+    private int parseInt(final String s, final String fieldName) {
+        final int ret;
+        try {
+            ret = Integer.parseInt(s);
+        } catch (NumberFormatException e) {
+            throw reportFatalErrorParsingLine("Non-numeric value in "
+                    + fieldName + " column");
+        }
+        return ret;
+    }
+
+    private void validateReferenceName(final String rname, final String fieldName) {
+        if (rname.equals("=")) {
+            if (fieldName.equals("MRNM")) {
+                return;
+            }
+            reportErrorParsingLine("= is not a valid value for "
+                    + fieldName + " field.");
+        }
+        if (this.mFileHeader.getSequenceDictionary().size() != 0) {
+            if (this.mFileHeader.getSequence(rname) == null) {
+                reportErrorParsingLine(fieldName
+                        + " '" + rname + "' not found in any SQ record");
+            }
+        }
+    }
+
+    /**
+     * Parse a SAM line.
+     * @param line line to parse
+     * @return a new SAMRecord object
+     */
+    public SAMRecord parseLine(final String line) {
+
+        return parseLine(line, -1);
+    }
+
+    /**
+     * Parse a SAM line.
+     * @param line line to parse
+     * @param lineNumber line number in the file. If the line number is not known
+     *          can be <=0.
+     * @return a new SAMRecord object
+     */
+    public SAMRecord parseLine(final String line, final int lineNumber) {
+
+        final String mCurrentLine = line;
+        this.currentLineNumber = lineNumber;
+        this.currentLine = line;
+
+        final int numFields = StringUtil.split(mCurrentLine, mFields, '\t');
+        if (numFields < NUM_REQUIRED_FIELDS) {
+            throw reportFatalErrorParsingLine("Not enough fields");
+        }
+        if (numFields == mFields.length) {
+            reportErrorParsingLine("Too many fields in SAM text record.");
+        }
+        for (int i = 0; i < numFields; ++i) {
+            if (mFields[i].length() == 0) {
+                reportErrorParsingLine("Empty field at position " + i + " (zero-based)");
+            }
+        }
+        final SAMRecord samRecord =
+                samRecordFactory.createSAMRecord(this.mFileHeader);
+        samRecord.setValidationStringency(this.validationStringency);
+        if (mParentReader != null)
+            samRecord.setFileSource(new SAMFileSource(mParentReader, null));
+        samRecord.setHeader(this.mFileHeader);
+        samRecord.setReadName(mFields[QNAME_COL]);
+
+        final int flags = parseInt(mFields[FLAG_COL], "FLAG");
+        samRecord.setFlags(flags);
+
+        String rname = mFields[RNAME_COL];
+        if (!rname.equals("*")) {
+            rname = SAMSequenceRecord.truncateSequenceName(rname);
+            validateReferenceName(rname, "RNAME");
+            samRecord.setReferenceName(rname);
+        } else if (!samRecord.getReadUnmappedFlag()) {
+            reportErrorParsingLine("RNAME is not specified but flags indicate mapped");
+        }
+
+        final int pos = parseInt(mFields[POS_COL], "POS");
+        final int mapq = parseInt(mFields[MAPQ_COL], "MAPQ");
+        final String cigar = mFields[CIGAR_COL];
+        if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(samRecord
+                .getReferenceName())) {
+            if (pos == 0) {
+                reportErrorParsingLine("POS must be non-zero if RNAME is specified");
+            }
+            if (!samRecord.getReadUnmappedFlag() && cigar.equals("*")) {
+                reportErrorParsingLine("CIGAR must not be '*' if RNAME is specified");
+            }
+        } else {
+            if (pos != 0) {
+                reportErrorParsingLine("POS must be zero if RNAME is not specified");
+            }
+            if (mapq != 0) {
+                reportErrorParsingLine("MAPQ must be zero if RNAME is not specified");
+            }
+            if (!cigar.equals("*")) {
+                reportErrorParsingLine("CIGAR must be '*' if RNAME is not specified");
+            }
+        }
+        samRecord.setAlignmentStart(pos);
+        samRecord.setMappingQuality(mapq);
+        samRecord.setCigarString(cigar);
+
+        String mateRName = mFields[MRNM_COL];
+        if (mateRName.equals("*")) {
+            if (samRecord.getReadPairedFlag() && !samRecord.getMateUnmappedFlag()) {
+                reportErrorParsingLine("MRNM not specified but flags indicate mate mapped");
+            }
+        } else {
+            if (!samRecord.getReadPairedFlag()) {
+                reportErrorParsingLine("MRNM specified but flags indicate unpaired");
+            }
+            if (!"=".equals(mateRName)) {
+                mateRName = SAMSequenceRecord.truncateSequenceName(mateRName);
+            }
+            validateReferenceName(mateRName, "MRNM");
+            if (mateRName.equals("=")) {
+                if (samRecord.getReferenceName() == null) {
+                    reportErrorParsingLine("MRNM is '=', but RNAME is not set");
+                }
+                samRecord.setMateReferenceName(samRecord.getReferenceName());
+            } else {
+                samRecord.setMateReferenceName(mateRName);
+            }
+        }
+
+        final int matePos = parseInt(mFields[MPOS_COL], "MPOS");
+        final int isize = parseInt(mFields[ISIZE_COL], "ISIZE");
+        if (!samRecord.getMateReferenceName().equals(
+                SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
+            if (matePos == 0) {
+                reportErrorParsingLine("MPOS must be non-zero if MRNM is specified");
+            }
+        } else {
+            if (matePos != 0) {
+                reportErrorParsingLine("MPOS must be zero if MRNM is not specified");
+            }
+            if (isize != 0) {
+                reportErrorParsingLine("ISIZE must be zero if MRNM is not specified");
+            }
+        }
+        samRecord.setMateAlignmentStart(matePos);
+        samRecord.setInferredInsertSize(isize);
+        if (!mFields[SEQ_COL].equals("*")) {
+            validateReadBases(mFields[SEQ_COL]);
+            samRecord.setReadString(mFields[SEQ_COL]);
+        } else {
+            samRecord.setReadBases(SAMRecord.NULL_SEQUENCE);
+        }
+        if (!mFields[QUAL_COL].equals("*")) {
+            if (samRecord.getReadBases() == SAMRecord.NULL_SEQUENCE) {
+                reportErrorParsingLine("QUAL should not be specified if SEQ is not specified");
+            }
+            if (samRecord.getReadString().length() != mFields[QUAL_COL].length()) {
+                reportErrorParsingLine("length(QUAL) != length(SEQ)");
+            }
+            samRecord.setBaseQualityString(mFields[QUAL_COL]);
+        } else {
+            samRecord.setBaseQualities(SAMRecord.NULL_QUALS);
+        }
+
+        for (int i = NUM_REQUIRED_FIELDS; i < numFields; ++i) {
+            parseTag(samRecord, mFields[i]);
+        }
+
+        final List<SAMValidationError> validationErrors = samRecord.isValid();
+        if (validationErrors != null) {
+            for (final SAMValidationError errorMessage : validationErrors) {
+                reportErrorParsingLine(errorMessage.getMessage());
+            }
+        }
+        return samRecord;
+    }
+
+    private void validateReadBases(final String bases) {
+        /*
+        * Using regex is slow, so check for invalid characters via
+        * isValidReadBase(), which hopefully the JIT will optimize. if
+        * (!VALID_BASES.matcher(bases).matches()) {
+        * reportErrorParsingLine("Invalid character in read bases"); }
+        */
+        for (int i = 0; i < bases.length(); ++i) {
+            if (!isValidReadBase(bases.charAt(i))) {
+                reportErrorParsingLine("Invalid character in read bases");
+                return;
+            }
+        }
+    }
+
+    private boolean isValidReadBase(final char base) {
+        switch (base) {
+            case 'a':
+            case 'c':
+            case 'm':
+            case 'g':
+            case 'r':
+            case 's':
+            case 'v':
+            case 't':
+            case 'w':
+            case 'y':
+            case 'h':
+            case 'k':
+            case 'd':
+            case 'b':
+            case 'n':
+            case 'A':
+            case 'C':
+            case 'M':
+            case 'G':
+            case 'R':
+            case 'S':
+            case 'V':
+            case 'T':
+            case 'W':
+            case 'Y':
+            case 'H':
+            case 'K':
+            case 'D':
+            case 'B':
+            case 'N':
+            case '.':
+            case '=':
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    private void parseTag(final SAMRecord samRecord, final String tag) {
+        Map.Entry<String, Object> entry = null;
+        try {
+            entry = tagCodec.decode(tag);
+        } catch (SAMFormatException e) {
+            reportErrorParsingLine(e);
+        }
+        if (entry != null) {
+            if (entry.getValue() instanceof TagValueAndUnsignedArrayFlag) {
+                final TagValueAndUnsignedArrayFlag valueAndFlag =
+                        (TagValueAndUnsignedArrayFlag) entry.getValue();
+                if (valueAndFlag.isUnsignedArray) {
+                    samRecord.setUnsignedArrayAttribute(entry.getKey(),
+                            valueAndFlag.value);
+                } else {
+                    samRecord.setAttribute(entry.getKey(), valueAndFlag.value);
+                }
+            } else {
+                samRecord.setAttribute(entry.getKey(), entry.getValue());
+            }
+        }
+    }
+
+    //
+    // Error methods
+    //
+
+    private RuntimeException reportFatalErrorParsingLine(final String reason) {
+        return new SAMFormatException(makeErrorString(reason));
+    }
+
+    private void reportErrorParsingLine(final String reason) {
+        final String errorMessage = makeErrorString(reason);
+
+        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
+            throw new SAMFormatException(errorMessage);
+        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
+            System.err
+                    .println("Ignoring SAM validation error due to lenient parsing:");
+            System.err.println(errorMessage);
+        }
+    }
+
+    private void reportErrorParsingLine(final Exception e) {
+        final String errorMessage = makeErrorString(e.getMessage());
+        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
+            throw new SAMFormatException(errorMessage);
+        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
+            System.err
+                    .println("Ignoring SAM validation error due to lenient parsing:");
+            System.err.println(errorMessage);
+        }
+    }
+
+    private String makeErrorString(final String reason) {
+        String fileMessage = "";
+        if (mFile != null) {
+            fileMessage = "File " + mFile + "; ";
+        }
+        return "Error parsing text SAM file. "
+                + reason + "; " + fileMessage + "Line "
+                + (this.currentLineNumber <= 0 ? "unknown" : this.currentLineNumber)
+                + "\nLine: " + this.currentLine;
+    }
+
+}
diff --git a/src/java/net/sf/samtools/SAMTextHeaderCodec.java b/src/java/net/sf/samtools/SAMTextHeaderCodec.java
index 530c637..f7f4bab 100644
--- a/src/java/net/sf/samtools/SAMTextHeaderCodec.java
+++ b/src/java/net/sf/samtools/SAMTextHeaderCodec.java
@@ -32,6 +32,7 @@ import java.io.BufferedWriter;
 import java.io.IOException;
 import java.io.Writer;
 import java.util.*;
+import java.util.regex.Pattern;
 
 /**
  * Parser for a SAM text header, and a generator of SAM text header.
@@ -59,7 +60,10 @@ public class SAMTextHeaderCodec {
     private BufferedWriter writer;
 
     private static final String TAG_KEY_VALUE_SEPARATOR = ":";
+    private static final char TAG_KEY_VALUE_SEPARATOR_CHAR = ':';
     private static final String FIELD_SEPARATOR = "\t";
+    private static final char FIELD_SEPARATOR_CHAR = '\t';
+    private static final Pattern FIELD_SEPARATOR_RE = Pattern.compile(FIELD_SEPARATOR);
 
     public static final String COMMENT_PREFIX = HEADER_LINE_START + HeaderRecordType.CO.name() + FIELD_SEPARATOR;
 
@@ -250,7 +254,13 @@ public class SAMTextHeaderCodec {
             assert(line.startsWith(HEADER_LINE_START));
 
             // Tab-separate
-            final String[] fields = line.split(FIELD_SEPARATOR);
+            String[] fields = new String[1024];
+            int numFields = StringUtil.split(line, fields, FIELD_SEPARATOR_CHAR);
+            if (numFields == fields.length) {
+                // Lots of fields, so fall back
+                fields = FIELD_SEPARATOR_RE.split(line);
+                numFields = fields.length;
+            }
 
             // Parse the HeaderRecordType
             try {
@@ -267,10 +277,10 @@ public class SAMTextHeaderCodec {
                 return;
             }
 
+            final String[] keyAndValue = new String[2];
             // Parse they key:value pairs
-            for (int i = 1; i < fields.length; ++i) {
-                final String[] keyAndValue = fields[i].split(TAG_KEY_VALUE_SEPARATOR, 2);
-                if (keyAndValue.length != 2) {
+            for (int i = 1; i < numFields; ++i) {
+                if (StringUtil.splitConcatenateExcessTokens(fields[i], keyAndValue, TAG_KEY_VALUE_SEPARATOR_CHAR) != 2) {
                     reportErrorParsingLine("Problem parsing " + HEADER_LINE_START + mHeaderRecordType +
                             " key:value pair", SAMValidationError.Type.POORLY_FORMATTED_HEADER_TAG, null);
                     continue;
diff --git a/src/java/net/sf/samtools/SAMTextReader.java b/src/java/net/sf/samtools/SAMTextReader.java
index 5e1c664..adef9d8 100644
--- a/src/java/net/sf/samtools/SAMTextReader.java
+++ b/src/java/net/sf/samtools/SAMTextReader.java
@@ -26,35 +26,16 @@ package net.sf.samtools;
 
 import net.sf.samtools.util.BufferedLineReader;
 import net.sf.samtools.util.CloseableIterator;
-import net.sf.samtools.util.StringUtil;
 
 import java.io.File;
 import java.io.InputStream;
-import java.util.Map;
-import java.util.List;
-import java.util.regex.Pattern;
+
 
 /**
  * Internal class for reading SAM text files.
  */
 class SAMTextReader extends SAMFileReader.ReaderImplementation {
-    // From SAM specification
-    private static final int QNAME_COL = 0;
-    private static final int FLAG_COL = 1;
-    private static final int RNAME_COL = 2;
-    private static final int POS_COL = 3;
-    private static final int MAPQ_COL = 4;
-    private static final int CIGAR_COL = 5;
-    private static final int MRNM_COL = 6;
-    private static final int MPOS_COL = 7;
-    private static final int ISIZE_COL = 8;
-    private static final int SEQ_COL = 9;
-    private static final int QUAL_COL = 10;
-
-    private static final int NUM_REQUIRED_FIELDS = 11;
-
-    // Read string must contain only these characters
-    private static final Pattern VALID_BASES = Pattern.compile("^[acmgrsvtwyhkdbnACMGRSVTWYHKDBN.=]+$");
+
 
     private SAMRecordFactory samRecordFactory;
     private BufferedLineReader mReader;
@@ -62,7 +43,7 @@ class SAMTextReader extends SAMFileReader.ReaderImplementation {
     private String mCurrentLine = null;
     private RecordIterator mIterator = null;
     private File mFile = null;
-    private final TextTagCodec tagCodec = new TextTagCodec();
+
     private SAMFileReader.ValidationStringency validationStringency = SAMFileReader.ValidationStringency.DEFAULT_STRINGENCY;
 
     /**
@@ -116,7 +97,7 @@ class SAMTextReader extends SAMFileReader.ReaderImplementation {
     }
 
     boolean hasIndex() {
-        return false;    
+        return false;
     }
 
     BAMIndex getIndex() {
@@ -210,51 +191,17 @@ class SAMTextReader extends SAMFileReader.ReaderImplementation {
         return mCurrentLine;
     }
 
-    private String makeErrorString(final String reason) {
-        String fileMessage = "";
-        if (mFile != null) {
-            fileMessage = "File " + mFile + "; ";
-        }
-        return "Error parsing text SAM file. " + reason + "; " + fileMessage +
-                "Line " + mReader.getLineNumber() + "\nLine: " + mCurrentLine;
-    }
 
-    private RuntimeException reportFatalErrorParsingLine(final String reason) {
-        return new SAMFormatException(makeErrorString(reason));
-    }
 
-    private void reportErrorParsingLine(final String reason) {
-        final String errorMessage = makeErrorString(reason);
 
-        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
-            throw new SAMFormatException(errorMessage);
-        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
-            System.err.println("Ignoring SAM validation error due to lenient parsing:");
-            System.err.println(errorMessage);
-        }
-    }
-
-    private void reportErrorParsingLine(final Exception e) {
-        final String errorMessage = makeErrorString(e.getMessage());
-        if (validationStringency == SAMFileReader.ValidationStringency.STRICT) {
-            throw new SAMFormatException(errorMessage);
-        } else if (validationStringency == SAMFileReader.ValidationStringency.LENIENT) {
-            System.err.println("Ignoring SAM validation error due to lenient parsing:");
-            System.err.println(errorMessage);
-        }
-    }
 
     /**
      * SAMRecord iterator for SAMTextReader
      */
     private class RecordIterator implements CloseableIterator<SAMRecord> {
 
-        /**
-         * Allocate this once rather than for every line as a performance optimization.
-         * The size is arbitrary -- merely large enough to handle the maximum number
-         * of fields we might expect from a reasonable SAM file.
-         */
-        private final String[] mFields = new String[10000];
+        private final SAMLineParser parser = new SAMLineParser(samRecordFactory, validationStringency,
+                mFileHeader, mParentReader, mFile);
 
         private RecordIterator() {
             if (mReader == null) {
@@ -285,234 +232,11 @@ class SAMTextReader extends SAMFileReader.ReaderImplementation {
             throw new UnsupportedOperationException("Not supported: remove");
         }
 
-        int parseInt(final String s, final String fieldName) {
-            final int ret;
-            try {
-                ret = Integer.parseInt(s);
-            } catch (NumberFormatException e) {
-                throw reportFatalErrorParsingLine("Non-numeric value in " + fieldName + " column");
-            }
-            return ret;
-        }
-
-        void validateReferenceName(final String rname, final String fieldName) {
-            if (rname.equals("=")) {
-                if (fieldName.equals("MRNM")) {
-                    return;
-                }
-                reportErrorParsingLine("= is not a valid value for " + fieldName + " field.");
-            }
-            if (getFileHeader().getSequenceDictionary().size() != 0) {
-                if (getFileHeader().getSequence(rname) == null) {
-                    reportErrorParsingLine(fieldName + " '" + rname + "' not found in any SQ record");
-                }
-            }
-        }
-
         private SAMRecord parseLine() {
-            final int numFields = StringUtil.split(mCurrentLine, mFields, '\t');
-            if (numFields < NUM_REQUIRED_FIELDS) {
-                throw reportFatalErrorParsingLine("Not enough fields");
-            }
-            if (numFields == mFields.length) {
-                reportErrorParsingLine("Too many fields in SAM text record.");
-            }
-            for (int i = 0; i < numFields; ++i) {
-                if (mFields[i].length() == 0) {
-                    reportErrorParsingLine("Empty field at position " + i + " (zero-based)");
-                }
-            }
-            final SAMRecord samRecord = samRecordFactory.createSAMRecord(mFileHeader);
-            samRecord.setValidationStringency(getValidationStringency());
-            if(mParentReader != null)
-                samRecord.setFileSource(new SAMFileSource(mParentReader,null));
-            samRecord.setHeader(mFileHeader);
-            samRecord.setReadName(mFields[QNAME_COL]);
-
-            final int flags = parseInt(mFields[FLAG_COL], "FLAG");
-            samRecord.setFlags(flags);
-
-            String rname = mFields[RNAME_COL];
-            if (!rname.equals("*")) {
-                rname = SAMSequenceRecord.truncateSequenceName(rname);
-                validateReferenceName(rname, "RNAME");
-                samRecord.setReferenceName(rname);
-            } else if (!samRecord.getReadUnmappedFlag()) {
-                    reportErrorParsingLine("RNAME is not specified but flags indicate mapped");
-                }
-
-            final int pos = parseInt(mFields[POS_COL], "POS");
-            final int mapq = parseInt(mFields[MAPQ_COL], "MAPQ");
-            final String cigar = mFields[CIGAR_COL];
-            if (!SAMRecord.NO_ALIGNMENT_REFERENCE_NAME.equals(samRecord.getReferenceName())) {
-                if (pos == 0) {
-                    reportErrorParsingLine("POS must be non-zero if RNAME is specified");
-                }
-                if (!samRecord.getReadUnmappedFlag() && cigar.equals("*")) {
-                    reportErrorParsingLine("CIGAR must not be '*' if RNAME is specified");
-                }
-            } else {
-                if (pos != 0) {
-                    reportErrorParsingLine("POS must be zero if RNAME is not specified");
-                }
-                if (mapq != 0) {
-                    reportErrorParsingLine("MAPQ must be zero if RNAME is not specified");
-                }
-                if (!cigar.equals("*")) {
-                    reportErrorParsingLine("CIGAR must be '*' if RNAME is not specified");
-                }
-            }
-            samRecord.setAlignmentStart(pos);
-            samRecord.setMappingQuality(mapq);
-            samRecord.setCigarString(cigar);
-
-            String mateRName = mFields[MRNM_COL];
-            if (mateRName.equals("*")) {
-                if (samRecord.getReadPairedFlag() && !samRecord.getMateUnmappedFlag()) {
-                    reportErrorParsingLine("MRNM not specified but flags indicate mate mapped");
-                }
-            }
-            else {
-                if (!samRecord.getReadPairedFlag()) {
-                    reportErrorParsingLine("MRNM specified but flags indicate unpaired");
-                }
-                if (!"=".equals(mateRName)) {
-                    mateRName = SAMSequenceRecord.truncateSequenceName(mateRName);
-                }
-                validateReferenceName(mateRName, "MRNM");
-                if (mateRName.equals("=")) {
-                    if (samRecord.getReferenceName() == null) {
-                        reportErrorParsingLine("MRNM is '=', but RNAME is not set");
-                    }
-                    samRecord.setMateReferenceName(samRecord.getReferenceName());
-                } else {
-                    samRecord.setMateReferenceName(mateRName);
-                }
-            }
 
-            final int matePos = parseInt(mFields[MPOS_COL], "MPOS");
-            final int isize = parseInt(mFields[ISIZE_COL], "ISIZE");
-            if (!samRecord.getMateReferenceName().equals(SAMRecord.NO_ALIGNMENT_REFERENCE_NAME)) {
-                if (matePos == 0) {
-                    reportErrorParsingLine("MPOS must be non-zero if MRNM is specified");
-                }
-            } else {
-                if (matePos != 0) {
-                    reportErrorParsingLine("MPOS must be zero if MRNM is not specified");
-                }
-                if (isize != 0) {
-                    reportErrorParsingLine("ISIZE must be zero if MRNM is not specified");
-                }
-            }
-            samRecord.setMateAlignmentStart(matePos);
-            samRecord.setInferredInsertSize(isize);
-            if (!mFields[SEQ_COL].equals("*")) {
-                validateReadBases(mFields[SEQ_COL]);
-                samRecord.setReadString(mFields[SEQ_COL]);
-            } else {
-                samRecord.setReadBases(SAMRecord.NULL_SEQUENCE);
-            }
-            if (!mFields[QUAL_COL].equals("*")) {
-                if (samRecord.getReadBases() == SAMRecord.NULL_SEQUENCE) {
-                    reportErrorParsingLine("QUAL should not be specified if SEQ is not specified");
-                }
-                if (samRecord.getReadString().length() != mFields[QUAL_COL].length()) {
-                    reportErrorParsingLine("length(QUAL) != length(SEQ)");
-                }
-                samRecord.setBaseQualityString(mFields[QUAL_COL]);
-            } else {
-                samRecord.setBaseQualities(SAMRecord.NULL_QUALS);
-            }
-
-            for (int i = NUM_REQUIRED_FIELDS; i < numFields; ++i) {
-                parseTag(samRecord, mFields[i]);
-            }
-
-            final List<SAMValidationError> validationErrors = samRecord.isValid();
-            if (validationErrors != null) {
-                for (final SAMValidationError errorMessage : validationErrors) {
-                    reportErrorParsingLine(errorMessage.getMessage());
-                }
-            }
-            return samRecord;
+            return parser.parseLine(mCurrentLine, mReader.getLineNumber());
         }
 
-        private void validateReadBases(final String bases) {
-/*
- * Using regex is slow, so check for invalid characters via isValidReadBase(), which hopefully the JIT will optimize.
-            if (!VALID_BASES.matcher(bases).matches()) {
-                reportErrorParsingLine("Invalid character in read bases");
-            }
-*/
-            for (int i = 0; i < bases.length(); ++i) {
-                if (!isValidReadBase(bases.charAt(i))) {
-                    reportErrorParsingLine("Invalid character in read bases");
-                    return;
-                }
-            }
-        }
-        
-        private boolean isValidReadBase(final char base) {
-            switch (base) {
-                case 'a':
-                case 'c':
-                case 'm':
-                case 'g':
-                case 'r':
-                case 's':
-                case 'v':
-                case 't':
-                case 'w':
-                case 'y':
-                case 'h':
-                case 'k':
-                case 'd':
-                case 'b':
-                case 'n':
-                case 'A':
-                case 'C':
-                case 'M':
-                case 'G':
-                case 'R':
-                case 'S':
-                case 'V':
-                case 'T':
-                case 'W':
-                case 'Y':
-                case 'H':
-                case 'K':
-                case 'D':
-                case 'B':
-                case 'N':
-                case '.':
-                case '=':
-                    return true;
-                default:
-                    return false;
-            }
-        }
-
-        private void parseTag(final SAMRecord samRecord, final String tag) {
-            Map.Entry<String, Object> entry = null;
-            try {
-                entry = tagCodec.decode(tag);
-            } catch (SAMFormatException e) {
-                reportErrorParsingLine(e);
-            }
-            if (entry != null) {
-                if (entry.getValue() instanceof TagValueAndUnsignedArrayFlag) {
-                    final TagValueAndUnsignedArrayFlag valueAndFlag = (TagValueAndUnsignedArrayFlag) entry.getValue();
-                    if (valueAndFlag.isUnsignedArray) {
-                        samRecord.setUnsignedArrayAttribute(entry.getKey(), valueAndFlag.value);
-                    }
-                    else {
-                        samRecord.setAttribute(entry.getKey(), valueAndFlag.value);
-                    }
-                } else {
-                    samRecord.setAttribute(entry.getKey(), entry.getValue());
-                }
-            }
-        }
     }
 }
 
diff --git a/src/java/net/sf/samtools/SAMValidationError.java b/src/java/net/sf/samtools/SAMValidationError.java
index eec12eb..f7392d4 100644
--- a/src/java/net/sf/samtools/SAMValidationError.java
+++ b/src/java/net/sf/samtools/SAMValidationError.java
@@ -35,6 +35,9 @@ public class SAMValidationError {
     }
 
     public enum Type {
+        /** quality encodings out of range; appear to be Solexa or Illumina when Phread expected */
+        INVALID_QUALITY_FORMAT(Severity.WARNING),
+        
         /** proper pair flag set for unpaired read */
         INVALID_FLAG_PROPER_PAIR,
 
diff --git a/src/java/net/sf/samtools/util/DateParser.java b/src/java/net/sf/samtools/util/DateParser.java
index 6211252..eab68db 100644
--- a/src/java/net/sf/samtools/util/DateParser.java
+++ b/src/java/net/sf/samtools/util/DateParser.java
@@ -95,14 +95,11 @@ public class DateParser {
     private static boolean check(StringTokenizer st, String token)
             throws InvalidDateException
     {
-        try {
-            if (st.nextToken().equals(token)) {
-                return true;
-            } else {
-                throw new InvalidDateException("Missing ["+token+"]");
-            }
-        } catch (NoSuchElementException ex) {
-            return false;
+        if (!st.hasMoreElements()) return false;
+        if (st.nextToken().equals(token)) {
+            return true;
+        } else {
+            throw new InvalidDateException("Missing ["+token+"]");
         }
     }
 
diff --git a/src/java/net/sf/samtools/util/StringLineReader.java b/src/java/net/sf/samtools/util/StringLineReader.java
index 4284a1a..a7e240c 100644
--- a/src/java/net/sf/samtools/util/StringLineReader.java
+++ b/src/java/net/sf/samtools/util/StringLineReader.java
@@ -23,20 +23,24 @@
  */
 package net.sf.samtools.util;
 
+import java.util.regex.Pattern;
+
 /**
  * Implementation of LineReader that gets its input from a String.  No charset conversion
  * is necessary because the String is in unicode.  Handles CR, LF or CRLF line termination,
  * but if asked to return the line terminator, it always comes back as LF.
  */
 public class StringLineReader implements LineReader {
-
+    private static final Pattern CRLF = Pattern.compile("\r\n");
     private final String theString;
     private int curPos = 0;
     private int lineNumber = 0;
 
     public StringLineReader(final String s) {
-        // Simplify later processing by replacing crlf with just lf, and replacing solo cr with lf
-        this.theString = s.replaceAll("\r\n", "\n").replaceAll("\r", "\n");
+        // Simplify later processing by replacing crlf with just lf, and replacing solo cr with lf.
+        // Note that String.replace(String, String) causes a regex to be used, so precompilation should be
+        // the best we can do short of handling the string directly.
+        this.theString = CRLF.matcher(s).replaceAll("\n").replace('\r', '\n');
     }
 
     /**
diff --git a/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java b/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java
new file mode 100644
index 0000000..8cc0ea2
--- /dev/null
+++ b/src/tests/java/net/sf/picard/sam/MarkDuplicatesTest.java
@@ -0,0 +1,139 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2012 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.picard.sam;
+
+import net.sf.picard.io.IoUtil;
+import net.sf.samtools.*;
+import net.sf.samtools.util.TestUtil;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.*;
+
+public class MarkDuplicatesTest {
+    private static final File TEST_DATA_DIR = new File("testdata/net/sf/picard/sam/MarkDuplicates");
+
+
+    /**
+     * Test that PG header records are created & chained appropriately (or not created), and that the PG record chains
+     * are as expected.  MarkDuplicates is used both to merge and to mark dupes in this case.
+     * @param suppressPg If true, do not create PG header record.
+     * @param expectedPnVnByReadName For each read, info about the expect chain of PG records.
+     */
+    @Test(dataProvider = "pgRecordChainingTest")
+    public void pgRecordChainingTest(final boolean suppressPg,
+                                     final Map<String, List<ExpectedPnAndVn>> expectedPnVnByReadName) {
+        final File outputDir = IoUtil.createTempDir("MarkDuplicatesTest.", ".tmp");
+        outputDir.deleteOnExit();
+        try {
+            // Run MarkDuplicates, merging the 3 input files, and either enabling or suppressing PG header
+            // record creation according to suppressPg.
+            final MarkDuplicates markDuplicates = new MarkDuplicates();
+            final ArrayList<String> args = new ArrayList<String>();
+            for (int i = 1; i <= 3; ++i) {
+                args.add("INPUT=" + new File(TEST_DATA_DIR, "merge" + i + ".sam").getAbsolutePath());
+            }
+            final File outputSam = new File(outputDir, "markDuplicatesTest.sam");
+            args.add("OUTPUT=" + outputSam.getAbsolutePath());
+            args.add("METRICS_FILE=" + new File(outputDir, "markDuplicatesTest.duplicate_metrics").getAbsolutePath());
+            if (suppressPg) args.add("PROGRAM_RECORD_ID=null");
+
+            // I generally prefer to call doWork rather than invoking the argument parser, but it is necessary
+            // in this case to initialize the command line.
+            // Note that for the unit test, version won't come through because it is obtained through jar
+            // manifest, and unit test doesn't run code from a jar.
+            Assert.assertEquals(markDuplicates.instanceMain(args.toArray(new String[args.size()])), 0);
+
+            // Read the MarkDuplicates output file, and get the PG ID for each read.  In this particular test,
+            // the PG ID should be the same for both ends of a pair.
+            final SAMFileReader reader = new SAMFileReader(outputSam);
+
+            final Map<String, String> pgIdForReadName = new HashMap<String, String>();
+            for (final SAMRecord rec : reader) {
+                final String existingPgId = pgIdForReadName.get(rec.getReadName());
+                final String thisPgId = rec.getStringAttribute(SAMTag.PG.name());
+                if (existingPgId != null) {
+                    Assert.assertEquals(thisPgId, existingPgId);
+                } else {
+                    pgIdForReadName.put(rec.getReadName(), thisPgId);
+                }
+            }
+            final SAMFileHeader header = reader.getFileHeader();
+            reader.close();
+
+            // Confirm that for each read name, the chain of PG records contains exactly the number that is expected,
+            // and that values in the PG chain are as expected.
+            for (final Map.Entry<String, List<ExpectedPnAndVn>> entry : expectedPnVnByReadName.entrySet()) {
+                final String readName = entry.getKey();
+                final List<ExpectedPnAndVn> expectedList = entry.getValue();
+                String pgId = pgIdForReadName.get(readName);
+                for (final ExpectedPnAndVn expected : expectedList) {
+                    final SAMProgramRecord programRecord = header.getProgramRecord(pgId);
+                    if (expected.expectedPn != null) Assert.assertEquals(programRecord.getProgramName(), expected.expectedPn);
+                    if (expected.expectedVn != null) Assert.assertEquals(programRecord.getProgramVersion(), expected.expectedVn);
+                    pgId = programRecord.getPreviousProgramGroupId();
+                }
+                Assert.assertNull(pgId);
+            }
+
+        } finally {
+            TestUtil.recursiveDelete(outputDir);
+        }
+    }
+
+    /**
+     * Represents an expected PN value and VN value for a PG record.  If one of thexe is null, any value is allowed
+     * in the PG record being tested.
+     */
+    private static class ExpectedPnAndVn {
+        final String expectedPn;
+        final String expectedVn;
+
+        private ExpectedPnAndVn(final String expectedPn, final String expectedVn) {
+            this.expectedPn = expectedPn;
+            this.expectedVn = expectedVn;
+        }
+    }
+
+    @DataProvider(name = "pgRecordChainingTest")
+    public Object[][] pgRecordChainingTestDataProvider() {
+        // Two test cases: One in which PG record generation is enabled, the other in which it is turned off.
+        final Map<String, List<ExpectedPnAndVn>> withPgMap = new HashMap<String, List<ExpectedPnAndVn>>();
+        withPgMap.put("1AAXX.1.1", Arrays.asList(new ExpectedPnAndVn("MarkDuplicates", null), new ExpectedPnAndVn("MarkDuplicates", "1"), new ExpectedPnAndVn("bwa", "1")));
+        withPgMap.put("1AAXX.2.1", Arrays.asList(new ExpectedPnAndVn("MarkDuplicates", null), new ExpectedPnAndVn("bwa", "2")));
+        withPgMap.put("1AAXX.3.1", Arrays.asList(new ExpectedPnAndVn("MarkDuplicates", null)));
+
+
+        final Map<String, List<ExpectedPnAndVn>> suppressPgMap = new HashMap<String, List<ExpectedPnAndVn>>();
+        suppressPgMap .put("1AAXX.1.1", Arrays.asList(new ExpectedPnAndVn("MarkDuplicates", "1"), new ExpectedPnAndVn("bwa", "1")));
+        suppressPgMap .put("1AAXX.2.1", Arrays.asList(new ExpectedPnAndVn("bwa", "2")));
+        suppressPgMap .put("1AAXX.3.1", new ArrayList<ExpectedPnAndVn>(0));
+        return new Object[][] {
+                { false, withPgMap},
+                { true, suppressPgMap}
+        };
+    }
+}
diff --git a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
index ca85dde..0150445 100644
--- a/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
+++ b/src/tests/java/net/sf/picard/sam/ValidateSamFileTest.java
@@ -284,6 +284,13 @@ public class ValidateSamFileTest {
     }
 
     @Test
+    public void testQualityFormatValidation() throws Exception {
+        final SAMFileReader samReader = new SAMFileReader(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam"));
+        final Histogram<String> results = executeValidation(samReader, null);
+        Assert.assertEquals(results.get(SAMValidationError.Type.INVALID_QUALITY_FORMAT.getHistogramString()).getValue(), 1.0);
+    }
+    
+    @Test
     public void testCigarOffEndOfReferenceValidation() throws Exception {
         final SAMRecordSetBuilder samBuilder = new SAMRecordSetBuilder();
         samBuilder.addFrag(String.valueOf(0), 0, 1, false);
diff --git a/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java b/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java
new file mode 100644
index 0000000..996d4af
--- /dev/null
+++ b/src/tests/java/net/sf/picard/util/QualityEncodingDetectorTest.java
@@ -0,0 +1,71 @@
+package net.sf.picard.util;
+
+import net.sf.picard.fastq.FastqReader;
+import net.sf.samtools.SAMFileReader;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+
+public class QualityEncodingDetectorTest {
+
+    private static class Testcase {
+        private final File f;
+        private final FastqQualityFormat q;
+
+        Testcase(final File file, final FastqQualityFormat qualityFormat) {
+            this.f = file;
+            this.q = qualityFormat;
+        }
+    }
+
+    final static List<Testcase> FASTQ_TESTCASES = Arrays.asList(
+            // Need to use full-range quality here, as Solexa and Illumina are near indistinguishable
+            new Testcase(new File("./testdata/net/sf/picard/sam/fastq2bam/fastq-solexa/solexa_full_range_as_solexa.fastq"), FastqQualityFormat.Solexa),
+            new Testcase(new File("./testdata/net/sf/picard/sam/fastq2bam/fastq-illumina/s_1_sequence.txt"), FastqQualityFormat.Illumina),
+            new Testcase(new File("./testdata/net/sf/picard/sam/fastq2bam/fastq-sanger/5k-30BB2AAXX.3.aligned.sam.fastq"), FastqQualityFormat.Standard)
+    );
+    final static List<Testcase> BAM_TESTCASES = Arrays.asList(
+            new Testcase(new File("./testdata/net/sf/picard/sam/unmapped.sam"), FastqQualityFormat.Standard),
+            new Testcase(new File("./testdata/net/sf/samtools/BAMFileIndexTest/index_test.bam"), FastqQualityFormat.Standard),
+            new Testcase(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/solexa-as-standard.bam"), FastqQualityFormat.Solexa),
+            new Testcase(new File("./testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam"), FastqQualityFormat.Illumina)
+            
+    );
+
+    Object[][] renderObjectArrayArray(final List<Testcase> testcaseList) {
+        final Object[][] data = new Object[testcaseList.size()][];
+        for (int i = 0; i < data.length; i++) {
+            final Testcase testcase = testcaseList.get(i);
+            data[i] = new Object[]{testcase.f, testcase.q};
+        }
+        return data;
+    }
+
+    @DataProvider(name = "BAM_TESTCASES")
+    Object[][] bamTestcases() {
+        return renderObjectArrayArray(BAM_TESTCASES);
+    }
+
+    @DataProvider(name = "FASTQ_TESTCASES")
+    Object[][] fastqTestcases() {
+        return renderObjectArrayArray(FASTQ_TESTCASES);
+    }
+
+    @Test(dataProvider = "FASTQ_TESTCASES", groups = {"unix"})
+    public void testFastqQualityInference(final File input, final FastqQualityFormat expectedQualityFormat) {
+        final FastqReader reader = new FastqReader(input);
+        Assert.assertEquals(QualityEncodingDetector.detect(reader), expectedQualityFormat);
+        reader.close();
+    }
+
+    @Test(dataProvider = "BAM_TESTCASES", groups = {"unix"})
+    public void testBamQualityInference(final File input, final FastqQualityFormat expectedQualityFormat) {
+        final SAMFileReader reader = new SAMFileReader(input);
+        Assert.assertEquals(QualityEncodingDetector.detect(reader), expectedQualityFormat);
+        reader.close();
+    }
+}
diff --git a/testdata/net/sf/picard/sam/MarkDuplicates/merge1.sam b/testdata/net/sf/picard/sam/MarkDuplicates/merge1.sam
new file mode 100644
index 0000000..7e1c583
--- /dev/null
+++ b/testdata/net/sf/picard/sam/MarkDuplicates/merge1.sam
@@ -0,0 +1,14 @@
+ at HD	VN:1.0	SO:coordinate
+ at SQ	SN:chr1	LN:101
+ at SQ	SN:chr2	LN:101
+ at SQ	SN:chr3	LN:101
+ at SQ	SN:chr4	LN:101
+ at SQ	SN:chr5	LN:101
+ at SQ	SN:chr6	LN:101
+ at SQ	SN:chr7	LN:404
+ at SQ	SN:chr8	LN:202
+ at RG	ID:1AAXX.1	SM:Hi,Mom!	LB:mylib
+ at PG	ID:MarkDuplicates	PN:MarkDuplicates	VN:1	CL:MarkDuplicates merge1.sam	PP:bwa
+ at PG	ID:bwa	PN:bwa	VN:1	CL:bwa aln
+1AAXX.1.1	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:1AAXX.1	PG:Z:MarkDuplicates
+1AAXX.1.1	163	chr7	302	255	101M	=	1	-201	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:1AAXX.1	PG:Z:MarkDuplicates
diff --git a/testdata/net/sf/picard/sam/MarkDuplicates/merge2.sam b/testdata/net/sf/picard/sam/MarkDuplicates/merge2.sam
new file mode 100644
index 0000000..10f4ed3
--- /dev/null
+++ b/testdata/net/sf/picard/sam/MarkDuplicates/merge2.sam
@@ -0,0 +1,13 @@
+ at HD	VN:1.0	SO:coordinate
+ at SQ	SN:chr1	LN:101
+ at SQ	SN:chr2	LN:101
+ at SQ	SN:chr3	LN:101
+ at SQ	SN:chr4	LN:101
+ at SQ	SN:chr5	LN:101
+ at SQ	SN:chr6	LN:101
+ at SQ	SN:chr7	LN:404
+ at SQ	SN:chr8	LN:202
+ at RG	ID:1AAXX.2	SM:Hi,Mom!	LB:mylib
+ at PG	ID:bwa	PN:bwa	VN:2	CL:bwa aln
+1AAXX.2.1	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:1AAXX.2	PG:Z:bwa
+1AAXX.2.1	163	chr7	302	255	101M	=	1	-201	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:1AAXX.2	PG:Z:bwa
diff --git a/testdata/net/sf/picard/sam/MarkDuplicates/merge3.sam b/testdata/net/sf/picard/sam/MarkDuplicates/merge3.sam
new file mode 100644
index 0000000..05cdbe1
--- /dev/null
+++ b/testdata/net/sf/picard/sam/MarkDuplicates/merge3.sam
@@ -0,0 +1,13 @@
+ at HD	VN:1.0	SO:coordinate
+ at SQ	SN:chr1	LN:101
+ at SQ	SN:chr2	LN:101
+ at SQ	SN:chr3	LN:101
+ at SQ	SN:chr4	LN:101
+ at SQ	SN:chr5	LN:101
+ at SQ	SN:chr6	LN:101
+ at SQ	SN:chr7	LN:404
+ at SQ	SN:chr8	LN:202
+ at RG	ID:1AAXX.3	SM:Hi,Mom!	LB:mylib
+ at PG	ID:bwa	PN:bwa	VN:3	CL:bwa aln
+1AAXX.3.1	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:1AAXX.3
+1AAXX.3.1	163	chr7	302	255	101M	=	1	-201	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:1AAXX.3
diff --git a/testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam b/testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam
new file mode 100644
index 0000000..a98d864
Binary files /dev/null and b/testdata/net/sf/picard/util/QualityEncodingDetectorTest/illumina-as-standard.bam differ
diff --git a/testdata/net/sf/picard/util/QualityEncodingDetectorTest/solexa-as-standard.bam b/testdata/net/sf/picard/util/QualityEncodingDetectorTest/solexa-as-standard.bam
new file mode 100644
index 0000000..7235dfa
Binary files /dev/null and b/testdata/net/sf/picard/util/QualityEncodingDetectorTest/solexa-as-standard.bam differ

-- 
manipulate SAM and BAM files



More information about the debian-med-commit mailing list