[med-svn] [mash] 01/05: Imported Upstream version 1.1.1

Sascha Steinbiss satta at debian.org
Sat Aug 27 09:18:17 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository mash.

commit 9aed75a8f2e565696d395511fea8a2c2b5a5bc3a
Author: Sascha Steinbiss <satta at debian.org>
Date:   Sat Aug 27 08:04:37 2016 +0000

    Imported Upstream version 1.1.1
---
 Makefile.in                       |  2 +-
 doc/sphinx/index.rst              |  7 +--
 doc/sphinx/tutorials.rst          | 18 +++++--
 src/mash/Command.cpp              | 17 ++++---
 src/mash/CommandFind.cpp          | 18 ++++---
 src/mash/CommandInfo.cpp          | 98 +++++++++++++++++++++++++++++++++++++--
 src/mash/CommandInfo.h            |  1 +
 src/mash/Sketch.cpp               | 47 +++++++------------
 src/mash/Sketch.h                 |  3 +-
 src/mash/hash.cpp                 |  2 +-
 src/mash/mash.cpp                 |  3 ++
 src/mash/sketchParameterSetup.cpp |  7 ++-
 src/mash/version.h                |  2 +-
 13 files changed, 163 insertions(+), 62 deletions(-)

diff --git a/Makefile.in b/Makefile.in
index aa10182..7b44de6 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -53,7 +53,7 @@ src/mash/memcpyWrap.o : src/mash/memcpyWrap.c
 	$(CC) $(CFLAGS) -c -o $@ $<
 
 src/mash/capnp/MinHash.capnp.c++ src/mash/capnp/MinHash.capnp.h : src/mash/capnp/MinHash.capnp
-	cd src/mash/capnp;@capnp@/bin/capnp compile -oc++ MinHash.capnp
+	cd src/mash/capnp;export PATH=@capnp@/bin/:${PATH};capnp compile -I @capnp@/include -oc++ MinHash.capnp
 
 install : mash
 	mkdir -p @prefix@/bin/
diff --git a/doc/sphinx/index.rst b/doc/sphinx/index.rst
index 9f10930..a963dee 100644
--- a/doc/sphinx/index.rst
+++ b/doc/sphinx/index.rst
@@ -13,12 +13,13 @@ Fast genome and metagenome distance estimation using MinHash
 
 |
 
-Preprint
+Publication
 ===========
-`"Fast genome and metagenome distance estimation using MinHash." Ondov et al. <http://biorxiv.org/content/early/2015/10/26/029827>`_
+`Mash: fast genome and metagenome distance estimation using MinHash. Ondov BD, Treangen TJ, Melsted P, Mallonee AB, Bergman NH, Koren S, Phillippy AM. Genome Biol. 2016 Jun 20;17(1):132. doi: 10.1186/s13059-016-0997-x. <http://genomebiology.biomedcentral.com/articles/10.1186/s13059-016-0997-x>`_
 
 .. toctree::
-
+   :maxdepth: 1
+   
    data
 
 Downloads
diff --git a/doc/sphinx/tutorials.rst b/doc/sphinx/tutorials.rst
index cbee821..f188019 100644
--- a/doc/sphinx/tutorials.rst
+++ b/doc/sphinx/tutorials.rst
@@ -59,19 +59,27 @@ reference (which there are two of in the sketch file):
 Querying read sets against an existing RefSeq sketch
 ----------------------------------------------------
 
-Download and gunzip the pre-sketched RefSeq archive:
+Download and gunzip the pre-sketched RefSeq archive (reads not provided here;
+10x-100x coverage of a single genome with any sequencing technology should
+work):
 
 .. download::
 
 `RefSeqSketches.msh.gz <http://gembox.cbcb.umd.edu/mash/RefSeqSketches.msh.gz>`_
 
-Sketch the reads (not provided here; 10x-100x coverage of a single bacterial genome
-with any sequencing technology should work), using :code:`-u` to improve results
-by filtering unique k-mers:
+Concatenate paired ends (this could also be piped to :code:`mash` to save space by
+specifying :code:`-` for standard input, zipped or unzipped):
 
 .. code::
 
-  mash sketch -u reads.fastq
+ cat reads_1.fastq read_2.fastq > reads.fastq
+ 
+Sketch the reads, using :code:`-m 2` to improve results
+by ignoring single-copy k-mers, which are more likely to be erroneous:
+
+.. code::
+
+  mash sketch -m 2 reads.fastq
 
 Run :code:`mash dist` with the RefSeq archive as the reference and the read
 sketch as the query:
diff --git a/src/mash/Command.cpp b/src/mash/Command.cpp
index 0092b77..f4f7504 100644
--- a/src/mash/Command.cpp
+++ b/src/mash/Command.cpp
@@ -158,19 +158,19 @@ Command::Command()
 {
     addAvailableOption("help", Option(Option::Boolean, "h", "", "Help", ""));
     addAvailableOption("kmer", Option(Option::Integer, "k", "Sketch", "K-mer size. Hashes will be based on strings of this many nucleotides. Canonical nucleotides are used by default (see Alphabet options below).", "21", 1, 32));
-    addAvailableOption("windowed", Option(Option::Boolean, "w", "Sketch", "Windowed", ""));
-    addAvailableOption("window", Option(Option::Integer, "l", "Sketch", "Window length. Hashes that are minima in any window of this size will be stored.", "10000"));
+    addAvailableOption("windowed", Option(Option::Boolean, "W", "Sketch", "Windowed", ""));
+    addAvailableOption("window", Option(Option::Integer, "L", "Window", "Window length. Hashes that are minima in any window of this size will be stored.", "10000"));
     //addAvailableOption("error", Option(Option::Number, "e", "Sketch", "Error bound. The (maximum) number of min-hashes in each sketch will be one divided by this number squared.", "0.05"));
     addAvailableOption("sketchSize", Option(Option::Integer, "s", "Sketch", "Sketch size. Each sketch will have at most this many non-redundant min-hashes.", "1000"));
     addAvailableOption("verbose", Option(Option::Boolean, "v", "Output", "Verbose", ""));
     addAvailableOption("silent", Option(Option::Boolean, "s", "Output", "Silent", ""));
-    addAvailableOption("individual", Option(Option::Boolean, "i", "Sketch", "Sketch individual sequences, rather than whole files.", ""));
+    addAvailableOption("individual", Option(Option::Boolean, "i", "Sketch", "Sketch individual sequences, rather than whole files, e.g. for multi-fastas of single-chromosome genomes or pair-wise gene comparisons.", ""));
     addAvailableOption("warning", Option(Option::Number, "w", "Sketch", "Probability threshold for warning about low k-mer size.", "0.01", 0, 1));
     addAvailableOption("reads", Option(Option::Boolean, "r", "Sketch", "Input is a read set. See Reads options below. Incompatible with -i.", ""));
     addAvailableOption("memory", Option(Option::Size, "b", "Reads", "Use a Bloom filter of this size (raw bytes or with K/M/G/T) to filter out unique k-mers. This is useful if exact filtering with -m uses too much memory. However, some unique k-mers may pass erroneously, and copies cannot be counted beyond 2. Implies -r."));
     addAvailableOption("minCov", Option(Option::Integer, "m", "Reads", "Minimum copies of each k-mer required to pass noise filter for reads. Implies -r.", "1"));
     addAvailableOption("targetCov", Option(Option::Number, "c", "Reads", "Target coverage. Sketching will conclude if this coverage is reached before the end of the input file (estimated by average k-mer multiplicity). Implies -r."));
-    addAvailableOption("genome", Option(Option::Size, "g", "Reads", "Genome size. If specified, will be used for p-value calculation instead of an estimated size from k-mer content. Implies -r."));
+    addAvailableOption("genome", Option(Option::Size, "g", "Reads", "Genome size (raw bases or with K/M/G/T). If specified, will be used for p-value calculation instead of an estimated size from k-mer content. Implies -r."));
     addAvailableOption("noncanonical", Option(Option::Boolean, "n", "Alphabet", "Preserve strand (by default, strand is ignored by using canonical DNA k-mers, which are alphabetical minima of forward-reverse pairs). Implied if an alphabet is specified with -a or -z.", ""));
     addAvailableOption("protein", Option(Option::Boolean, "a", "Alphabet", "Use amino acid alphabet (A-Z, except BJOUXZ). Implies -n, -k 9.", ""));
     addAvailableOption("alphabet", Option(Option::String, "z", "Alphabet", "Alphabet to base hashes on (case ignored by default; see -Z). K-mers with other characters will be ignored. Implies -n.", ""));
@@ -179,11 +179,13 @@ Command::Command()
     addAvailableOption("pacbio", Option(Option::Boolean, "pacbio", "", "Use default settings for PacBio sequences.", ""));
     addAvailableOption("illumina", Option(Option::Boolean, "illumina", "", "Use default settings for Illumina sequences.", ""));
     addAvailableOption("nanopore", Option(Option::Boolean, "nanopore", "", "Use default settings for Oxford Nanopore sequences.", ""));
+    addAvailableOption("factor", Option(Option::Number, "f", "Window", "Compression factor", "100"));
     
     addCategory("", "");
     addCategory("Input", "Input");
     addCategory("Output", "Output");
     addCategory("Sketch", "Sketching");
+    addCategory("Window", "Sketching (windowed)");
     addCategory("Reads", "Sketching (reads)");
     addCategory("Alphabet", "Sketching (alphabet)");
 }
@@ -348,8 +350,11 @@ void Command::useSketchOptions()
     useOption("protein");
     useOption("alphabet");
     useOption("case");
-    //useOption("windowed");
-    //useOption("window");
+#ifdef COMMAND_FIND
+    useOption("windowed");
+    useOption("window");
+    useOption("factor");
+#endif
     useOption("sketchSize");
     useOption("individual");
     useOption("warning");
diff --git a/src/mash/CommandFind.cpp b/src/mash/CommandFind.cpp
index 5d0311a..4004c8d 100644
--- a/src/mash/CommandFind.cpp
+++ b/src/mash/CommandFind.cpp
@@ -12,6 +12,7 @@
 #include <set>
 #include <unordered_set>
 #include "ThreadPool.h"
+#include "sketchParameterSetup.h"
 
 using namespace::std;
 
@@ -29,10 +30,7 @@ CommandFind::CommandFind()
     addOption("threshold", Option(Option::Number, "t", "Output", "Threshold. This fraction of the query sequence's min-hashes must appear in a query-sized window of a reference sequence for the match to be reported.", "0.2", 0.0, 1.0));
     addOption("best", Option(Option::Integer, "b", "Output", "Best hit count. This many of the best hits will be reported (0 to report all hits). Score ties are broken by keeping the hit to the earlier reference or to the left-most position.", "0"));
     addOption("self", Option(Option::Boolean, "self", "Output", "Ignore self matches if query ID appears in reference.", ""));
-    useOption("kmer");
-    useOption("window");
-    useOption("factor");
-    useOption("threads");
+    useSketchOptions();
 }
 
 int CommandFind::run() const
@@ -50,6 +48,12 @@ int CommandFind::run() const
     
 	Sketch::Parameters params;
 	
+    if ( sketchParameterSetup(params, *(Command *)this) )
+    {
+    	return 1;
+    }
+    
+    params.windowed = true;
     Sketch sketch;
     const string & fileReference = arguments[0];
     
@@ -61,9 +65,9 @@ int CommandFind::run() const
     
     if ( hasSuffix(fileReference, suffixSketchWindowed) )
     {
-        if ( options.at("kmer").active || options.at("minsWindowed").active || options.at("window").active )
+        if ( options.at("kmer").active || options.at("sketchSize").active || options.at("window").active )
         {
-            cerr << "ERROR: The options " << options.at("kmer").identifier << ", " << options.at("minsWindowed").identifier << " and " << options.at("window").identifier << " cannot be used when a sketch is provided; these are inherited from the sketch.\n";
+            cerr << "ERROR: The options " << options.at("kmer").identifier << ", " << options.at("sketchSize").identifier << " and " << options.at("window").identifier << " cannot be used when a sketch is provided; these are inherited from the sketch.\n";
             return 1;
         }
     }
@@ -275,6 +279,7 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
     params.minHashesPerWindow = mins;
     params.windowed = true;
     params.windowSize = windowSize;
+    params.use64 = true;
     
     getMinHashPositions(positionHashes, seq, length, params);
     //
@@ -296,7 +301,6 @@ void findPerStrand(const CommandFind::FindInput * input, CommandFind::FindOutput
     for ( Sketch::Hash_set::const_iterator i = minHashes.begin(); i != minHashes.end(); i++ )
     {
         Sketch::hash_t hash = *i;
-        //cout << "Hash " << hash << endl;
         
         if ( sketch.hasLociByHash(hash) )
         {
diff --git a/src/mash/CommandInfo.cpp b/src/mash/CommandInfo.cpp
index e17f1b1..1e5fc2c 100644
--- a/src/mash/CommandInfo.cpp
+++ b/src/mash/CommandInfo.cpp
@@ -19,9 +19,10 @@ CommandInfo::CommandInfo()
     argumentString = "<sketch>";
     
     useOption("help");
-    addOption("header", Option(Option::Boolean, "H", "", "Only show header info. Do not list each sketch. Incompatible with -t and -c.", ""));
-    addOption("tabular", Option(Option::Boolean, "t", "", "Tabular output (rather than padded), with no header. Incompatible with -H and -c.", ""));
-    addOption("counts", Option(Option::Boolean, "c", "", "Show hash count histograms for each sketch. Incompatible with -H and -t.", ""));
+    addOption("header", Option(Option::Boolean, "H", "", "Only show header info. Do not list each sketch. Incompatible with -d, -t and -c.", ""));
+    addOption("tabular", Option(Option::Boolean, "t", "", "Tabular output (rather than padded), with no header. Incompatible with -d, -H and -c.", ""));
+    addOption("counts", Option(Option::Boolean, "c", "", "Show hash count histograms for each sketch. Incompatible with -d, -H and -t.", ""));
+    addOption("dump", Option(Option::Boolean, "d", "", "Dump sketches in JSON format. Incompatible with -H, -t, and -c.", ""));
 }
 
 int CommandInfo::run() const
@@ -35,6 +36,7 @@ int CommandInfo::run() const
     bool header = options.at("header").active;
     bool tabular = options.at("tabular").active;
     bool counts = options.at("counts").active;
+    bool dump = options.at("dump").active;
     
     if ( header && tabular )
     {
@@ -54,6 +56,27 @@ int CommandInfo::run() const
     	return 1;
     }
     
+	if ( dump )
+	{
+		if ( tabular )
+		{
+			cerr << "ERROR: The options -d and -t are incompatible." << endl;
+			return 1;
+		}
+	
+		if ( header )
+		{
+			cerr << "ERROR: The options -d and -H are incompatible." << endl;
+			return 1;
+		}
+	
+		if ( counts )
+		{
+			cerr << "ERROR: The options -d and -c are incompatible." << endl;
+			return 1;
+		}
+	}
+    
     const string & file = arguments[0];
     
     if ( ! hasSuffix(file, suffixSketch) )
@@ -72,6 +95,10 @@ int CommandInfo::run() const
     {
     	return printCounts(sketch);
     }
+    else if ( dump )
+    {
+		return writeJson(sketch);
+    }
     
     if ( tabular )
     {
@@ -166,3 +193,68 @@ int CommandInfo::printCounts(const Sketch & sketch) const
 	
 	return 0;
 }
+
+int CommandInfo::writeJson(const Sketch & sketch) const
+{
+	string alphabet;
+	sketch.getAlphabetAsString(alphabet);
+	bool use64 = sketch.getUse64();
+	
+#ifdef ARCH_32
+	#define HASH "MurmurHash3_x86_32"
+#else
+	#define HASH "MurmurHash3_x64_128"
+#endif
+	
+	cout << "{" << endl;
+	cout << "	\"kmer\" : " << sketch.getKmerSize() << ',' << endl;
+	cout << "	\"alphabet\" : \"" << alphabet << "\"," << endl;
+	cout << "	\"preserveCase\" : " << (sketch.getPreserveCase() ? "true" : "false") << ',' << endl;
+	cout << "	\"canonical\" : " << (sketch.getNoncanonical() ? "false" : "true") << ',' << endl;
+	cout << "	\"sketchSize\" : " << sketch.getMinHashesPerWindow() << ',' << endl;
+	cout << "	\"hashType\" : \"" << HASH << "\"," << endl;
+	cout << "	\"hashBits\" : " << (use64 ? 64 : 32) << ',' << endl;
+	cout << "	\"hashSeed\" : " << seed << ',' << endl;
+	cout << " 	\"sketches\" :" << endl;
+	cout << "	[" << endl;
+	
+	for ( uint64_t i = 0; i < sketch.getReferenceCount(); i++ )
+	{
+		const Sketch::Reference & ref = sketch.getReference(i);
+		
+		cout << "		{" << endl;
+		cout << "			\"name\" : \"" << ref.name << "\"," << endl;
+		cout << "			\"length\" : " << ref.length << ',' << endl;
+		cout << "			\"comment\" : \"" << ref.comment << "\"," << endl;
+		cout << "			\"hashes\" :" << endl;
+		cout << "			[" << endl;
+		
+		for ( int j = 0; j < ref.hashesSorted.size(); j++ )
+		{
+			cout << "				" << ( use64 ? ref.hashesSorted.at(j).hash64 : ref.hashesSorted.at(j).hash32 );
+			
+			if ( j < ref.hashesSorted.size() - 1 )
+			{
+				cout << ',';
+			}
+			
+			cout << endl;
+		}
+		
+		cout << "			]" << endl;
+		
+		if ( i < sketch.getReferenceCount() - 1 )
+		{
+			cout << "		}," << endl;
+		}
+		else
+		{
+			cout << "		}" << endl;
+		}
+	}
+	
+	cout << "	]" << endl;
+	cout << "}" << endl;
+	
+	return 0;
+}
diff --git a/src/mash/CommandInfo.h b/src/mash/CommandInfo.h
index 5743e0b..742d95a 100644
--- a/src/mash/CommandInfo.h
+++ b/src/mash/CommandInfo.h
@@ -21,6 +21,7 @@ public:
 private:
 	
 	int printCounts(const Sketch & sketch) const;
+	int writeJson(const Sketch & sketch) const;
 };
 
 #endif
diff --git a/src/mash/Sketch.cpp b/src/mash/Sketch.cpp
index 5d6ac0e..ef433ee 100644
--- a/src/mash/Sketch.cpp
+++ b/src/mash/Sketch.cpp
@@ -99,7 +99,7 @@ int Sketch::initFromFiles(const vector<string> & files, const Parameters & param
 	
     for ( int i = 0; i < files.size(); i++ )
     {
-        bool isSketch = hasSuffix(files[i], suffixSketch);
+        bool isSketch = hasSuffix(files[i], parameters.windowed ? suffixSketchWindowed : suffixSketch);
         
         if ( isSketch )
         {
@@ -124,7 +124,7 @@ int Sketch::initFromFiles(const vector<string> & files, const Parameters & param
             	cerr << "\nWARNING: The sketch file " << files[i] << " has different alphabet (" << alphabetTest << ") than the current alphabet (" << alphabet << "). This file will be skipped." << endl << endl;
             	continue;
             }
-		
+			
 			if ( sketchTest.getKmerSize() != parameters.kmerSize )
 			{
 				cerr << "\nWARNING: The sketch " << files[i] << " has a kmer size (" << sketchTest.getKmerSize() << ") that does not match the current kmer size (" << parameters.kmerSize << "). This file will be skipped." << endl << endl;
@@ -183,7 +183,10 @@ int Sketch::initFromFiles(const vector<string> & files, const Parameters & param
 		
 			if ( parameters.concatenated )
 			{
-				fclose(inStream);
+				if ( files[i] != "-" )
+				{
+					fclose(inStream);
+				}
 			
 				threadPool.runWhenThreadAvailable(new SketchInput(files[i], 0, 0, "", "", parameters), sketchFile);
 			}
@@ -290,11 +293,6 @@ bool Sketch::sketchFileBySequence(FILE * file, ThreadPool<Sketch::SketchInput, S
 			continue;
 		}
 		
-		if ( parameters.windowed )
-		{
-			positionHashesByReference.resize(count + 1);
-		}
-		
 		//if ( verbosity > 0 && parameters.windowed ) cout << '>' << seq->name.s << " (" << l << "nt)" << endl << endl;
 		//if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
 		//printf("seq: %s\n", seq->seq.s);
@@ -996,7 +994,7 @@ Sketch::SketchOutput * loadCapnp(Sketch::SketchInput * input)
     for ( uint64_t i = 0; i < lociReader.size(); i++ )
     {
         capnp::MinHash::LocusList::Locus::Reader locusReader = lociReader[i];
-        //cout << locusReader.getHash() << '\t' << locusReader.getSequence() << '\t' << locusReader.getPosition() << endl;
+        //cout << locusReader.getHash64() << '\t' << locusReader.getSequence() << '\t' << locusReader.getPosition() << endl;
         output->positionHashesByReference[locusReader.getSequence()].push_back(Sketch::PositionHash(locusReader.getPosition(), locusReader.getHash64()));
     }
     
@@ -1152,11 +1150,6 @@ Sketch::SketchOutput * sketchFile(Sketch::SketchInput * input)
 		
 		count++;
 		
-		if ( parameters.windowed )
-		{
-			// TODO positionHashesByReference.resize(count + 1);
-		}
-		
 		//if ( verbosity > 0 && parameters.windowed ) cout << '>' << seq->name.s << " (" << l << "nt)" << endl << endl;
 		//if (seq->comment.l) printf("comment: %s\n", seq->comment.s);
 		//printf("seq: %s\n", seq->seq.s);
@@ -1172,19 +1165,12 @@ Sketch::SketchOutput * sketchFile(Sketch::SketchInput * input)
 			}
 		}
 		
-		if ( parameters.windowed )
-		{
-			// TODO getMinHashPositions(positionHashesByReference[count], seq->seq.s, l, parameters, verbosity);
-		}
-		else
+		addMinHashes(minHashHeap, seq->seq.s, l, parameters);
+		
+		if ( parameters.reads && parameters.targetCov > 0 && minHashHeap.estimateMultiplicity() >= parameters.targetCov )
 		{
-	        addMinHashes(minHashHeap, seq->seq.s, l, parameters);
-	        
-	        if ( parameters.reads && parameters.targetCov > 0 && minHashHeap.estimateMultiplicity() >= parameters.targetCov )
-	        {
-	        	l = -1; // success code
-	        	break;
-	        }
+			l = -1; // success code
+			break;
 		}
 	}
 	
@@ -1252,8 +1238,6 @@ Sketch::SketchOutput * sketchSequence(Sketch::SketchInput * input)
 	output->references.resize(1);
 	Sketch::Reference & reference = output->references[0];
 	
-    MinHashHeap minHashHeap(parameters.use64, parameters.minHashesPerWindow, parameters.reads ? parameters.minCov : 1);
-
 	reference.length = input->length;
 	reference.name = input->name;
 	reference.comment = input->comment;
@@ -1261,15 +1245,16 @@ Sketch::SketchOutput * sketchSequence(Sketch::SketchInput * input)
 	
 	if ( parameters.windowed )
 	{
-		// TODO getMinHashPositions(positionHashesByReference[count], input->seq, l, parameters, verbosity);
+		output->positionHashesByReference.resize(1);
+		getMinHashPositions(output->positionHashesByReference[0], input->seq, input->length, parameters, 0);
 	}
 	else
 	{
+	    MinHashHeap minHashHeap(parameters.use64, parameters.minHashesPerWindow, parameters.reads ? parameters.minCov : 1);
         addMinHashes(minHashHeap, input->seq, input->length, parameters);
+		setMinHashesForReference(reference, minHashHeap);
 	}
 	
-	setMinHashesForReference(reference, minHashHeap);
-	
 	return output;
 }
 
diff --git a/src/mash/Sketch.h b/src/mash/Sketch.h
index 83bf5a9..a756169 100644
--- a/src/mash/Sketch.h
+++ b/src/mash/Sketch.h
@@ -102,7 +102,7 @@ public:
     
     struct PositionHash
     {
-        PositionHash(uint32_t positionNew, uint32_t hashNew) :
+        PositionHash(uint32_t positionNew, hash_t hashNew) :
             position(positionNew),
             hash(hashNew)
             {}
@@ -123,7 +123,6 @@ public:
         uint32_t position;
     };
     
-    typedef std::unordered_map < hash_t, std::vector<Sketch::PositionHash> > LociByHash_umap;
     typedef std::unordered_set<hash_t> Hash_set;
     
     struct Reference
diff --git a/src/mash/hash.cpp b/src/mash/hash.cpp
index 3dd0a18..36aaa1f 100644
--- a/src/mash/hash.cpp
+++ b/src/mash/hash.cpp
@@ -21,7 +21,7 @@ hash_u getHash(const char * seq, int length, bool use64)
 #else
     char data[16];
     MurmurHash3_x64_128(seq, length, seed, data);
-#endif    
+#endif
     
     hash_u hash;
     
diff --git a/src/mash/mash.cpp b/src/mash/mash.cpp
index 364fcf7..564e474 100644
--- a/src/mash/mash.cpp
+++ b/src/mash/mash.cpp
@@ -25,6 +25,9 @@ int main(int argc, const char ** argv)
 #ifdef COMMAND_WITHIN
     commandList.addCommand(new CommandContain());
 #endif
+#ifdef COMMAND_FIND
+	commandList.addCommand(new CommandFind());
+#endif
     commandList.addCommand(new CommandInfo());
     commandList.addCommand(new CommandPaste());
     commandList.addCommand(new CommandBounds());
diff --git a/src/mash/sketchParameterSetup.cpp b/src/mash/sketchParameterSetup.cpp
index 9928b4e..e161fd1 100644
--- a/src/mash/sketchParameterSetup.cpp
+++ b/src/mash/sketchParameterSetup.cpp
@@ -18,8 +18,11 @@ int sketchParameterSetup(Sketch::Parameters & parameters, const Command & comman
     parameters.reads = command.getOption("reads").active;
     parameters.minCov = command.getOption("minCov").getArgumentAsNumber();
     parameters.targetCov = command.getOption("targetCov").getArgumentAsNumber();
-    parameters.windowed = false;//command.getOption("windowed").active;
-    parameters.windowSize = 0;//command.getOption("window").getArgumentAsNumber();
+#ifdef COMMAND_FIND
+    parameters.windowed = command.getOption("windowed").active;
+    parameters.windowSize = command.getOption("window").getArgumentAsNumber();
+    parameters.concatenated = false;
+#endif
     parameters.parallelism = command.getOption("threads").getArgumentAsNumber();
     parameters.preserveCase = command.getOption("case").active;
     
diff --git a/src/mash/version.h b/src/mash/version.h
index c87b312..7bd7fdf 100644
--- a/src/mash/version.h
+++ b/src/mash/version.h
@@ -4,4 +4,4 @@
 //
 // See the LICENSE.txt file included with this software for license information.
 
-static const char * version = "1.1";
+static const char * version = "1.1.1";

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/mash.git



More information about the debian-med-commit mailing list