[med-svn] [iqtree] 01/03: Imported Upstream version 1.4.2+dfsg

Andreas Tille tille at debian.org
Fri Jun 24 20:13:03 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository iqtree.

commit a68a042d6b808c14b5aa4ff28daa98c9afba848a
Author: Andreas Tille <tille at debian.org>
Date:   Fri Jun 24 21:45:20 2016 +0200

    Imported Upstream version 1.4.2+dfsg
---
 CMakeLists.txt            |   2 +-
 alignment.cpp             |   4 +-
 alignment.h               |   4 +-
 iqtree.cpp                | 114 +++++----
 model/modelfactory.cpp    | 124 +++++++++-
 model/modelfactory.h      |   8 +
 model/modelgtr.cpp        |   1 +
 model/partitionmodel.cpp  |  33 +++
 model/partitionmodel.h    |   8 +
 model/ratefree.cpp        |   1 +
 model/ratefree.h          |   2 +-
 model/ratefreeinvar.cpp   |   2 +-
 model/rategamma.cpp       |   1 +
 model/rategamma.h         |   2 +-
 model/rategammainvar.cpp  |  36 ++-
 model/rategammainvar.h    |   9 +
 model/rateheterogeneity.h |  16 ++
 model/rateinvar.cpp       |   6 +-
 model/ratekategory.cpp    |   1 +
 msetsblock.cpp            |   1 +
 pda.cpp                   |   2 +-
 phyloanalysis.cpp         |  34 +--
 phylosupertreeplen.cpp    |   7 +
 phylosupertreeplen.h      |   9 +
 phylotesting.cpp          |  41 +++-
 phylotree.cpp             |   5 +-
 phylotree.h               |  34 +--
 quartet.cpp               | 611 +++++++++++++++++++++++++---------------------
 superalignment.cpp        |  20 +-
 superalignment.h          |   4 +-
 tools.cpp                 |  52 +++-
 tools.h                   |  23 +-
 32 files changed, 811 insertions(+), 406 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3eea0ea..22a4864 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -45,7 +45,7 @@ add_definitions(-DIQ_TREE)
 # The version number.
 set (iqtree_VERSION_MAJOR 1)
 set (iqtree_VERSION_MINOR 4)
-set (iqtree_VERSION_PATCH "0") 
+set (iqtree_VERSION_PATCH "2") 
 
 set(BUILD_SHARED_LIBS OFF)
 
diff --git a/alignment.cpp b/alignment.cpp
index bf6d195..4a20602 100644
--- a/alignment.cpp
+++ b/alignment.cpp
@@ -1889,7 +1889,7 @@ void Alignment::printFasta(const char *file_name, bool append, const char *aln_s
 }
 
 
-void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char) {
+void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) {
     IntVector::iterator it;
     for (it = seq_id.begin(); it != seq_id.end(); it++) {
         assert(*it >= 0 && *it < aln->getNSeq());
@@ -1930,6 +1930,8 @@ void Alignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_t
     countConstSite();
     buildSeqStates();
     assert(size() <= aln->size());
+    if (kept_partitions)
+        kept_partitions->push_back(0);
 }
 
 
diff --git a/alignment.h b/alignment.h
index a9dba09..1b736c9 100644
--- a/alignment.h
+++ b/alignment.h
@@ -352,8 +352,10 @@ public:
             @param aln original input alignment
             @param seq_id ID of sequences to extract from
             @param min_true_cher the minimum number of non-gap characters, true_char<min_true_char -> delete the sequence
+            @param min_taxa only keep alignment that has >= min_taxa sequences
+            @param[out] kept_partitions (for SuperAlignment) indices of kept partitions
      */
-    virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char);
+    virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa = 0, IntVector *kept_partitions = NULL);
 
     /**
             extract a sub-set of patterns
diff --git a/iqtree.cpp b/iqtree.cpp
index e38a1ed..2179c96 100644
--- a/iqtree.cpp
+++ b/iqtree.cpp
@@ -773,54 +773,54 @@ void IQTree::initCandidateTreeSet(int nParTrees, int nNNITrees) {
 }
 
 void IQTree::initializePLL(Params &params) {
-  pllAttr.rateHetModel = PLL_GAMMA;
-  pllAttr.fastScaling = PLL_FALSE;
-  pllAttr.saveMemory = PLL_FALSE;
-  pllAttr.useRecom = PLL_FALSE;
-  pllAttr.randomNumberSeed = params.ran_seed;
-  pllAttr.numberOfThreads = params.num_threads; /* This only affects the pthreads version */
-  if (pllInst != NULL) {
-    pllDestroyInstance(pllInst);
-  }
-  /* Create a PLL instance */
-  pllInst = pllCreateInstance(&pllAttr);
-
-  /* Read in the alignment file */
-  stringstream pllAln;
-  if (aln->isSuperAlignment()) {
-    ((SuperAlignment*) aln)->printCombinedAlignment(pllAln);
-  } else {
-    aln->printPhylip(pllAln);
-  }
-  string pllAlnStr = pllAln.str();
-  pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length());
-
-  /* Read in the partition information */
-  // BQM: to avoid printing file
-  stringstream pllPartitionFileHandle;
-  createPLLPartition(params, pllPartitionFileHandle);
-  pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str());
-
-  /* Validate the partitions */
-  if (!pllPartitionsValidate(partitionInfo, pllAlignment)) {
-    outError("pllPartitionsValidate");
-  }
-
-  /* Commit the partitions and build a partitions structure */
-  pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment);
-
-  /* We don't need the the intermediate partition queue structure anymore */
-  pllQueuePartitionsDestroy(&partitionInfo);
-
-  /* eliminate duplicate sites from the alignment and update weights vector */
-  pllAlignmentRemoveDups(pllAlignment, pllPartitions);
-
-  pllTreeInitTopologyForAlignment(pllInst, pllAlignment);
-
-  /* Connect the alignment and partition structure with the tree structure */
-  if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) {
-    outError("Incompatible tree/alignment combination");
-  }
+    pllAttr.rateHetModel = PLL_GAMMA;
+    pllAttr.fastScaling = PLL_FALSE;
+    pllAttr.saveMemory = PLL_FALSE;
+    pllAttr.useRecom = PLL_FALSE;
+    pllAttr.randomNumberSeed = params.ran_seed;
+    pllAttr.numberOfThreads = params.num_threads; /* This only affects the pthreads version */
+    if (pllInst != NULL) {
+        pllDestroyInstance(pllInst);
+    }
+    /* Create a PLL instance */
+    pllInst = pllCreateInstance(&pllAttr);
+
+    /* Read in the alignment file */
+    stringstream pllAln;
+    if (aln->isSuperAlignment()) {
+        ((SuperAlignment *) aln)->printCombinedAlignment(pllAln);
+    } else {
+        aln->printPhylip(pllAln);
+    }
+    string pllAlnStr = pllAln.str();
+    pllAlignment = pllParsePHYLIPString(pllAlnStr.c_str(), pllAlnStr.length());
+
+    /* Read in the partition information */
+    // BQM: to avoid printing file
+    stringstream pllPartitionFileHandle;
+    createPLLPartition(params, pllPartitionFileHandle);
+    pllQueue *partitionInfo = pllPartitionParseString(pllPartitionFileHandle.str().c_str());
+
+    /* Validate the partitions */
+    if (!pllPartitionsValidate(partitionInfo, pllAlignment)) {
+        outError("pllPartitionsValidate");
+    }
+
+    /* Commit the partitions and build a partitions structure */
+    pllPartitions = pllPartitionsCommit(partitionInfo, pllAlignment);
+
+    /* We don't need the the intermediate partition queue structure anymore */
+    pllQueuePartitionsDestroy(&partitionInfo);
+
+    /* eliminate duplicate sites from the alignment and update weights vector */
+    pllAlignmentRemoveDups(pllAlignment, pllPartitions);
+
+    pllTreeInitTopologyForAlignment(pllInst, pllAlignment);
+
+    /* Connect the alignment and partition structure with the tree structure */
+    if (!pllLoadAlignment(pllInst, pllAlignment, pllPartitions)) {
+        outError("Incompatible tree/alignment combination");
+    }
 }
 
 
@@ -1424,7 +1424,7 @@ double IQTree::getAlphaFromPLL() {
 
 void IQTree::inputModelPLL2IQTree() {
     // TODO add support for partitioned model
-    dynamic_cast<RateGamma*>(getRate())->setGammaShape(pllPartitions->partitionData[0]->alpha);
+    getRate()->setGammaShape(pllPartitions->partitionData[0]->alpha);
     if (aln->num_states == 4) {
         ((ModelGTR*) getModel())->setRateMatrix(pllPartitions->partitionData[0]->substRates);
         getModel()->decomposeRateMatrix();
@@ -1720,6 +1720,8 @@ extern pllUFBootData * pllUFBootDataPtr;
 string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
 	if (logl_epsilon == -1)
 		logl_epsilon = params->modeps;
+//    if (params->test_param)
+//        logl_epsilon = 1.0;
     cout << "Estimate model parameters (epsilon = " << logl_epsilon << ")" << endl;
 	double stime = getRealTime();
 	string newTree;
@@ -1743,8 +1745,18 @@ string IQTree::optimizeModelParameters(bool printInfo, double logl_epsilon) {
         if (printInfo)
             cout << etime - stime << " seconds (logl: " << curScore << ")" << endl;
 	} else {
-		double modOptScore =
-                getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon);
+        double modOptScore;
+        if (params->test_param) { // DO RESTART ON ALPHA AND P_INVAR
+            double stime = getRealTime();
+            modOptScore = getModelFactory()->optimizeParametersGammaInvar(params->fixed_branch_length, printInfo, logl_epsilon);
+            double etime = getRealTime();
+            cout << "Testing param took: " << etime -stime << " CPU seconds" << endl;
+            cout << endl;
+            params->test_param = false;
+        } else {
+            modOptScore = getModelFactory()->optimizeParameters(params->fixed_branch_length, printInfo, logl_epsilon);
+        }
+
 		if (isSuperTree()) {
 			((PhyloSuperTree*) this)->computeBranchLengths();
 		}
diff --git a/model/modelfactory.cpp b/model/modelfactory.cpp
index fc4de9a..bb7efbd 100644
--- a/model/modelfactory.cpp
+++ b/model/modelfactory.cpp
@@ -693,22 +693,20 @@ bool ModelFactory::readSiteFreq(Alignment *aln, char* site_freq_file, IntVector
 double ModelFactory::initGTRGammaIParameters(RateHeterogeneity *rate, ModelSubst *model, double initAlpha,
                                            double initPInvar, double *initRates, double *initStateFreqs)  {
 
-    RateGammaInvar* rateGammaInvar = dynamic_cast<RateGammaInvar*>(rate);
-    ModelGTR* modelGTR = dynamic_cast<ModelGTR*>(model);
+    RateHeterogeneity* rateGammaInvar = rate;
+    ModelGTR* modelGTR = (ModelGTR*)(model);
     modelGTR->setRateMatrix(initRates);
     modelGTR->setStateFrequency(initStateFreqs);
     rateGammaInvar->setGammaShape(initAlpha);
     rateGammaInvar->setPInvar(initPInvar);
     modelGTR->decomposeRateMatrix();
-    rateGammaInvar->computeRates();
     site_rate->phylo_tree->clearAllPartialLH();
     return site_rate->phylo_tree->computeLikelihood();
 }
 
 double ModelFactory::optimizeParametersOnly(double gradient_epsilon) {
     double logl;
-    if (Params::getInstance().fai && dynamic_cast<RateGammaInvar*>(site_rate) != NULL
-        && dynamic_cast<ModelGTR*>(model) != NULL) {
+    if (Params::getInstance().fai && site_rate != NULL && model != NULL) {
         cout << "Optimize substitutional and site rates with restart ..." << endl;
         PhyloTree* tree = site_rate->phylo_tree;
         double initAlpha = 0.1;
@@ -740,8 +738,8 @@ double ModelFactory::optimizeParametersOnly(double gradient_epsilon) {
                 site_rate->optimizeParameters(gradient_epsilon);
                 logl = tree->optimizeAllBranches(1);
             }
-            RateGammaInvar* rateGammaInvar = dynamic_cast<RateGammaInvar*>(site_rate);
-            ModelGTR* modelGTR = dynamic_cast<ModelGTR*>(model);
+            RateHeterogeneity* rateGammaInvar = site_rate;
+            ModelGTR* modelGTR = (ModelGTR*)(model);
             double curAlpha = rateGammaInvar->getGammaShape();
             double curPInvar = rateGammaInvar->getPInvar();
             if (logl > bestLogl) {
@@ -821,6 +819,8 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) {
     model->decomposeRateMatrix();
     site_rate->phylo_tree->clearAllPartialLH();
 
+    score = site_rate->phylo_tree->computeLikelihood();
+
     delete [] bound_check;
     delete [] lower_bound;
     delete [] upper_bound;
@@ -829,6 +829,112 @@ double ModelFactory::optimizeAllParameters(double gradient_epsilon) {
     return score;
 }
 
+double ModelFactory::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
+	PhyloTree *tree = site_rate->getTree();
+
+	RateHeterogeneity* site_rates = tree->getRate();
+	if (site_rates == NULL) {
+//		outError("The model must be +I+G");
+        // model is not +I+G, call conventional function instead
+		return optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
+	}
+
+	double frac_const = tree->aln->frac_const_sites;
+	if (fixed_len) {
+		tree->setCurScore(tree->computeLikelihood());
+	} else {
+		tree->optimizeAllBranches(1);
+	}
+
+
+	/* Back up branch lengths and substitutional rates */
+	DoubleVector lenvec;
+	DoubleVector bestLens;
+	tree->saveBranchLengths(lenvec);
+	int numRateEntries = tree->getModel()->getNumRateEntries();
+	double *rates = new double[numRateEntries];
+	double *bestRates = new double[numRateEntries];
+	tree->getModel()->getRateMatrix(rates);
+	int numStates = tree->aln->num_states;
+	double *state_freqs = new double[numStates];
+	tree->getModel()->getStateFrequency(state_freqs);
+
+	/* Best estimates found */
+	double *bestStateFreqs =  new double[numStates];
+	double bestLogl = tree->getCurScore();
+	double bestAlpha = 0.0;
+	double bestPInvar = 0.0;
+
+	double testInterval = (frac_const - MIN_PINVAR*2) / 10;
+	double initPInv = MIN_PINVAR;
+	double initAlpha = site_rates->getGammaShape();
+
+    if (write_info)
+        cout << "testInterval: " << testInterval << endl;
+
+	// Now perform testing different inital p_inv values
+	while (initPInv <= frac_const) {
+        if (write_info) {
+            cout << endl;
+            cout << "Testing with init. pinv = " << initPInv << " / init. alpha = "  << initAlpha << endl;
+        }
+		tree->restoreBranchLengths(lenvec);
+		((ModelGTR*) tree->getModel())->setRateMatrix(rates);
+		((ModelGTR*) tree->getModel())->setStateFrequency(state_freqs);
+		tree->getModel()->decomposeRateMatrix();
+		site_rates->setPInvar(initPInv);
+		site_rates->setGammaShape(initAlpha);
+		tree->clearAllPartialLH();
+		optimizeParameters(fixed_len, write_info, logl_epsilon, gradient_epsilon);
+		double estAlpha = tree->getRate()->getGammaShape();
+		double estPInv = tree->getRate()->getPInvar();
+		double logl = tree->getCurScore();
+        if (write_info) {
+            cout << "Est. alpha: " << estAlpha << " / Est. pinv: " << estPInv
+            << " / Logl: " << logl << endl;
+        }
+		initPInv = initPInv + testInterval;
+
+		if (tree->getCurScore() > bestLogl) {
+			bestLogl = logl;
+			bestAlpha = estAlpha;
+			bestPInvar = estPInv;
+			bestLens.clear();
+			tree->saveBranchLengths(bestLens);
+			tree->getModel()->getRateMatrix(bestRates);
+			tree->getModel()->getStateFrequency(bestStateFreqs);
+		}
+	}
+
+	site_rates->setGammaShape(bestAlpha);
+//	site_rates->setFixGammaShape(false);
+	site_rates->setPInvar(bestPInvar);
+//	site_rates->setFixPInvar(false);
+	((ModelGTR*) tree->getModel())->setRateMatrix(bestRates);
+	((ModelGTR*) tree->getModel())->setStateFrequency(bestStateFreqs);
+	tree->restoreBranchLengths(bestLens);
+	tree->getModel()->decomposeRateMatrix();
+	tree->clearAllPartialLH();
+	tree->setCurScore(tree->computeLikelihood());
+    if (write_info) {    
+        cout << endl;
+        cout << "Best initial alpha: " << bestAlpha << " / initial pinv: " << bestPInvar << " / ";
+        cout << "Logl: " << tree->getCurScore() << endl;
+    }
+
+	delete [] rates;
+	delete [] state_freqs;
+	delete [] bestRates;
+	delete [] bestStateFreqs;
+    
+    // updating global variable is not safe!
+//	Params::getInstance().testAlpha = false;
+    
+    // 2016-03-14: this was missing!
+    return tree->getCurScore();
+}
+
+
 double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
                                         double logl_epsilon, double gradient_epsilon) {
 	assert(model);
@@ -950,13 +1056,14 @@ double ModelFactory::optimizeParameters(bool fixed_len, bool write_info,
 	}
 	double elapsed_secs = getRealTime() - begin_time;
 	if (write_info)
-		cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl << endl;
+		cout << "Parameters optimization took " << i-1 << " rounds (" << elapsed_secs << " sec)" << endl;
 	startStoringTransMatrix();
 
 	// For UpperBounds -----------
 	tree->mlCheck = 1;
 	// ---------------------------
 
+	tree->setCurScore(cur_lh);
 	return cur_lh;
 }
 
@@ -1105,3 +1212,4 @@ bool ModelFactory::getVariables(double *variables) {
     return changed;
 }
 
+
diff --git a/model/modelfactory.h b/model/modelfactory.h
index a4334d5..baedb6e 100644
--- a/model/modelfactory.h
+++ b/model/modelfactory.h
@@ -169,6 +169,14 @@ public:
                                       double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
 
 	/**
+	 *  optimize model parameters and tree branch lengths for the +I+G model
+	 *  using restart strategy.
+	 * 	@param fixed_len TRUE to fix branch lengths, default is false
+	 *	@return the best likelihood
+	 */
+	virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+
+	/**
 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
 	 */
 	virtual bool isUnstableParameters();
diff --git a/model/modelgtr.cpp b/model/modelgtr.cpp
index 39e18d8..557a291 100644
--- a/model/modelgtr.cpp
+++ b/model/modelgtr.cpp
@@ -592,6 +592,7 @@ double ModelGTR::optimizeParameters(double gradient_epsilon) {
     if (changed) {
         decomposeRateMatrix();
         phylo_tree->clearAllPartialLH();
+        score = phylo_tree->computeLikelihood();
     }
 	
 	delete [] bound_check;
diff --git a/model/partitionmodel.cpp b/model/partitionmodel.cpp
index cb853c1..bddbecd 100644
--- a/model/partitionmodel.cpp
+++ b/model/partitionmodel.cpp
@@ -175,6 +175,39 @@ double PartitionModel::optimizeParameters(bool fixed_len, bool write_info, doubl
     return tree_lh;
 }
 
+
+double PartitionModel::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
+    PhyloSuperTree *tree = (PhyloSuperTree*)site_rate->getTree();
+    double tree_lh = 0.0;
+    int ntrees = tree->size();
+
+    if (tree->part_order.empty()) tree->computePartitionOrder();
+	#ifdef _OPENMP
+	#pragma omp parallel for reduction(+: tree_lh) schedule(dynamic) if(ntrees >= tree->params->num_threads)
+	#endif
+    for (int i = 0; i < ntrees; i++) {
+        int part = tree->part_order[i];
+    	if (write_info)
+        #ifdef _OPENMP
+        #pragma omp critical
+        #endif
+        {
+    		cout << "Optimizing " << tree->at(part)->getModelName() <<
+        		" parameters for partition " << tree->part_info[part].name <<
+        		" (" << tree->at(part)->getModelFactory()->getNParameters() << " free parameters)" << endl;
+        }
+        tree_lh += tree->at(part)->getModelFactory()->optimizeParametersGammaInvar(fixed_len, write_info && verbose_mode >= VB_MED, 
+            logl_epsilon/min(ntrees,10), gradient_epsilon/min(ntrees,10));
+    }
+    //return ModelFactory::optimizeParameters(fixed_len, write_info);
+
+    if (tree->params->link_alpha) {
+        tree_lh = optimizeLinkedAlpha(write_info, gradient_epsilon);
+    }
+    return tree_lh;
+}
+
+
 PartitionModel::~PartitionModel()
 {
 }
diff --git a/model/partitionmodel.h b/model/partitionmodel.h
index 8fe7fda..2a34473 100644
--- a/model/partitionmodel.h
+++ b/model/partitionmodel.h
@@ -73,6 +73,14 @@ public:
 	virtual double optimizeParameters(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
 
 	/**
+	 *  optimize model parameters and tree branch lengths for the +I+G model
+	 *  using restart strategy.
+	 * 	@param fixed_len TRUE to fix branch lengths, default is false
+	 *	@return the best likelihood
+	 */
+	virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+
+	/**
 	 * @return TRUE if parameters are at the boundary that may cause numerical unstability
 	 */
 	virtual bool isUnstableParameters();
diff --git a/model/ratefree.cpp b/model/ratefree.cpp
index ba5bcb6..3265a2b 100644
--- a/model/ratefree.cpp
+++ b/model/ratefree.cpp
@@ -248,6 +248,7 @@ double RateFree::optimizeParameters(double gradient_epsilon) {
         if (sorted_rates)
             quicksort(rates, 0, ncategory-1, prop);
         phylo_tree->clearAllPartialLH();
+        score = phylo_tree->computeLikelihood();
     }
     optimizing_params = 0;
 
diff --git a/model/ratefree.h b/model/ratefree.h
index 327012b..062985a 100644
--- a/model/ratefree.h
+++ b/model/ratefree.h
@@ -10,7 +10,7 @@
 
 #include "rategamma.h"
 
-class RateFree: virtual public RateGamma {
+class RateFree: public RateGamma {
 public:
 	/**
 		constructor
diff --git a/model/ratefreeinvar.cpp b/model/ratefreeinvar.cpp
index b3b8c01..0ec731c 100644
--- a/model/ratefreeinvar.cpp
+++ b/model/ratefreeinvar.cpp
@@ -8,7 +8,7 @@
 #include "ratefreeinvar.h"
 
 RateFreeInvar::RateFreeInvar(int ncat, double start_alpha, string params, bool sorted_rates, double p_invar_sites, string opt_alg, PhyloTree *tree)
-: RateGamma(ncat, 1.0, false, tree), RateInvar(p_invar_sites, tree), RateFree(ncat, start_alpha, params, sorted_rates, opt_alg, tree)
+: RateInvar(p_invar_sites, tree), RateFree(ncat, start_alpha, params, sorted_rates, opt_alg, tree)
 {
 	cur_optimize = 0;
 	name = "+I" + name;
diff --git a/model/rategamma.cpp b/model/rategamma.cpp
index 6957c3f..48e0814 100644
--- a/model/rategamma.cpp
+++ b/model/rategamma.cpp
@@ -150,6 +150,7 @@ void RateGamma::computeRatesMean () {
 
 void RateGamma::setGammaShape(double gs) {
 	gamma_shape = gs;
+    computeRates();
 }
 
 double RateGamma::computeFunction(double shape) {
diff --git a/model/rategamma.h b/model/rategamma.h
index 8be01b6..19fc809 100644
--- a/model/rategamma.h
+++ b/model/rategamma.h
@@ -186,7 +186,7 @@ public:
 		return fix_gamma_shape;
 	}
 
-	void setFixGammaShape(bool fixGammaShape) {
+	virtual void setFixGammaShape(bool fixGammaShape) {
 		fix_gamma_shape = fixGammaShape;
 	}
 
diff --git a/model/rategammainvar.cpp b/model/rategammainvar.cpp
index dc293bd..2b0641d 100644
--- a/model/rategammainvar.cpp
+++ b/model/rategammainvar.cpp
@@ -60,7 +60,7 @@ string RateGammaInvar::getNameParams() {
 double RateGammaInvar::computeFunction(double value) {
 	if (cur_optimize == 0)
 		gamma_shape = value;
-	else 
+	else
 		p_invar = value;
 	// need to compute rates again if p_inv or Gamma shape changes!
 	computeRates();
@@ -114,8 +114,9 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 	if (ndim == 0)
 		return phylo_tree->computeLikelihood();
 
+
 	if (!joint_optimize) {
-//		double lh = phylo_tree->computeLikelihood();
+		double lh = phylo_tree->computeLikelihood();
 		cur_optimize = 0;
 		double gamma_lh;
 		if (Params::getInstance().testAlpha) {
@@ -123,19 +124,41 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 		} else {
             gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
         }
+		assert(gamma_lh >= lh-0.1);
 		cur_optimize = 1;
 		double invar_lh = -DBL_MAX;
         invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
-//		assert(tree_lh >= lh-0.1);
-//		lh = tree_lh;
+		assert(invar_lh >= gamma_lh-0.1);
+		//lh = tree_lh;
 
-//		assert(gamma_lh >= invar_lh - 0.1);
-		phylo_tree->clearAllPartialLH();
+		//assert(gamma_lh >= invar_lh - 0.1);
+//		phylo_tree->clearAllPartialLH();
 //		return gamma_lh;
         cur_optimize = 0;
         return invar_lh;
 	}
 
+/*
+	if (!joint_optimize) {
+//		double lh = phylo_tree->computeLikelihood();
+		cur_optimize = 1;
+		double invar_lh = -DBL_MAX;
+		invar_lh = RateInvar::optimizeParameters(gradient_epsilon);
+//		assert(tree_lh >= lh-0.1);
+//		lh = tree_lh;
+		cur_optimize = 0;
+		double gamma_lh;
+		if (Params::getInstance().testAlpha) {
+			gamma_lh = RateGamma::optimizeParameters(gradient_epsilon, 0.05, 10);
+		} else {
+			gamma_lh = RateGamma::optimizeParameters(gradient_epsilon);
+		}
+		//assert(gamma_lh >= invar_lh - 0.1);
+		phylo_tree->clearAllPartialLH();
+		return gamma_lh;
+	}
+*/
+
 	if (verbose_mode >= VB_MAX)
 		cout << "Optimizing " << name << " model parameters by BFGS..." << endl;
 
@@ -156,6 +179,7 @@ double RateGammaInvar::optimizeParameters(double gradient_epsilon) {
 	getVariables(variables);
 
 	phylo_tree->clearAllPartialLH();
+    score = phylo_tree->computeLikelihood();
 
 	delete [] bound_check;
 	delete [] lower_bound;
diff --git a/model/rategammainvar.h b/model/rategammainvar.h
index c4092c8..6d7926c 100644
--- a/model/rategammainvar.h
+++ b/model/rategammainvar.h
@@ -64,6 +64,15 @@ public:
 	virtual double getRate(int category) { return RateGamma::getRate(category); }
 
 	/**
+		set the proportion of invariable sites. Default: do nothing
+		@param pinv the proportion of invariable sites
+	*/
+	virtual void setPInvar(double pInvar) {
+		p_invar = pInvar;
+        computeRates();
+	}
+
+	/**
 	 * @return model name with parameters in form of e.g. GTR{a,b,c,d,e,f}
 	 */
 	virtual string getNameParams();
diff --git a/model/rateheterogeneity.h b/model/rateheterogeneity.h
index 85e14c6..51bab81 100644
--- a/model/rateheterogeneity.h
+++ b/model/rateheterogeneity.h
@@ -146,6 +146,11 @@ public:
 	*/
 	virtual void setPInvar(double pinv) { }
 
+    /**
+        set whether to fix p_invar
+    */
+	virtual void setFixPInvar(bool fixPInvar) {}
+
 	/**
 		Set whether or not to optimize p_invar
 		@param opt TRUE to optimize p_invar, FALSE otherwise
@@ -159,6 +164,17 @@ public:
 	virtual double getGammaShape() { return 0.0; }
 
 	/**
+		set the Gamma shape. Default: nothing
+		@param gs Gamma shape
+	*/	
+	virtual void setGammaShape(double gs) {}
+
+    /**
+        set whether to fix gamma shape
+    */
+	virtual void setFixGammaShape(bool fixGammaShape) {}
+
+	/**
 		@return >0 if this is a Gamma model (default: 0)
 	*/	
     virtual int isGammaRate() { return 0; }
diff --git a/model/rateinvar.cpp b/model/rateinvar.cpp
index 58b7d52..55988c5 100644
--- a/model/rateinvar.cpp
+++ b/model/rateinvar.cpp
@@ -93,10 +93,10 @@ double RateInvar::optimizeParameters(double gradient_epsilon) {
 	double ferror;
 	p_invar = minimizeOneDimen(MIN_PINVAR, p_invar, min(phylo_tree->aln->frac_const_sites, 1.0-MIN_PINVAR), max(gradient_epsilon, TOL_PINVAR), &negative_lh, &ferror);
 	//p_invar = minimizeOneDimen(MIN_PINVAR, p_invar, 1.0 - MIN_PINVAR, TOL_PINVAR, &negative_lh, &ferror);
-    phylo_tree->clearAllPartialLH();
-	phylo_tree->computePtnInvar();
+//    phylo_tree->clearAllPartialLH();
+//	phylo_tree->computePtnInvar();
 //	return -negative_lh;
-    return phylo_tree->computeLikelihood();
+    return -computeFunction(p_invar);
 }
 
 void RateInvar::writeInfo(ostream &out) {
diff --git a/model/ratekategory.cpp b/model/ratekategory.cpp
index fe20122..64ceb09 100644
--- a/model/ratekategory.cpp
+++ b/model/ratekategory.cpp
@@ -86,6 +86,7 @@ double RateKategory::optimizeParameters(double gradient_epsilon)
 	getVariables(variables);
 	//sort(rates, rates+ncategory);
 	phylo_tree->clearAllPartialLH();
+    score = phylo_tree->computeLikelihood();
 	
 	delete [] bound_check;
 	delete [] lower_bound;
diff --git a/msetsblock.cpp b/msetsblock.cpp
index 839467e..d4309f1 100644
--- a/msetsblock.cpp
+++ b/msetsblock.cpp
@@ -227,6 +227,7 @@ void MSetsBlock::Read(NxsToken &token)
 					errormsg += " instead";
 					throw NxsException(errormsg, token.GetFilePosition(), token.GetFileLine(), token.GetFileColumn());
 				}
+                token.SetLabileFlagBit(NxsToken::preserveUnderscores);
 				if (token.Equals(";"))
 					break;
 				else
diff --git a/pda.cpp b/pda.cpp
index 9a17049..9f9db7d 100644
--- a/pda.cpp
+++ b/pda.cpp
@@ -2189,7 +2189,7 @@ int main(int argc, char *argv[])
     
     bool append_log = false;
     
-    if (!Params::getInstance().ignore_checkpoint) {
+    if (!Params::getInstance().ignore_checkpoint && fileExists(filename)) {
         checkpoint->load();
         if (checkpoint->hasKey("finished")) {
             if (checkpoint->getBool("finished")) {
diff --git a/phyloanalysis.cpp b/phyloanalysis.cpp
index d321ac3..ebfffcf 100644
--- a/phyloanalysis.cpp
+++ b/phyloanalysis.cpp
@@ -681,7 +681,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 			reportRate(out, tree);
 		}
 
-    		if (params.lmap_num_quartets) {
+    		if (params.lmap_num_quartets >= 0) {
 			tree.reportLikelihoodMapping(out);
 		}
 
@@ -1080,7 +1080,7 @@ void reportPhyloAnalysis(Params &params, string &original_model,
 		cout << "  Site log-likelihoods:          " << params.out_prefix << ".sitelh" << endl;
 		}
 	}
-    	if (params.lmap_num_quartets) {
+    	if (params.lmap_num_quartets >= 0) {
 		cout << "  Likelihood mapping plot (SVG): " << params.out_prefix << ".lmap.svg" << endl;
 		cout << "  Likelihood mapping plot (EPS): " << params.out_prefix << ".lmap.eps" << endl;
 	}
@@ -1727,6 +1727,9 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 
     iqtree.initializeAllPartialLh();
 	double initEpsilon = params.min_iterations == 0 ? params.modeps : (params.modeps*10);
+	if (params.test_param)
+		initEpsilon = 0.1;
+
 	string initTree;
 
 	if (iqtree.getRate()->name.find("+I+G") != string::npos) {
@@ -1747,6 +1750,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
             cout << "Testing alpha took: " << etime -stime << " CPU seconds" << endl;
             cout << endl;
 		}
+
 	}
 
     // Optimize model parameters and branch lengths using ML for the initial tree
@@ -1767,11 +1771,16 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
         iqtree.getCheckpoint()->dump();
     }
 
-    if (params.lmap_num_quartets) {
-        cout << "Performing likelihood mapping with " << params.lmap_num_quartets << " quartets..." << endl;
+    if (params.lmap_num_quartets >= 0) {
+        cout << endl << "Performing likelihood mapping with ";
+        if (params.lmap_num_quartets > 0)
+            cout << params.lmap_num_quartets;
+        else
+            cout << "all";
+        cout << " quartets..." << endl;
         double lkmap_time = getRealTime();
         iqtree.doLikelihoodMapping();
-        cout << getRealTime()-lkmap_time << " seconds" << endl;
+        cout << "Likelihood mapping needed " << getRealTime()-lkmap_time << " seconds" << endl << endl;
     }
     
     bool finishedCandidateSet = iqtree.getCheckpoint()->getBool("finishedCandidateSet");
@@ -1942,10 +1951,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
         } else {
             cout << "Performs final model parameters optimization" << endl;
             string tree;
-            if (params.testAlpha)
-                tree = iqtree.optimizeModelParameters(true, 0.001);
-            else
-                tree = iqtree.optimizeModelParameters(true);
+            tree = iqtree.optimizeModelParameters(true);
             iqtree.candidateTrees.update(tree, iqtree.getCurScore(), true);
             iqtree.getCheckpoint()->putBool("finishedModelFinal", true);
             iqtree.saveCheckpoint();
@@ -2038,7 +2044,7 @@ void runTreeReconstruction(Params &params, string &original_model, IQTree &iqtre
 }
 
 void computeLoglFromUserInputGAMMAInvar(Params &params, IQTree &iqtree) {
-	RateGammaInvar *site_rates = dynamic_cast<RateGammaInvar *>(iqtree.getRate());
+	RateHeterogeneity *site_rates = iqtree.getRate();
 	site_rates->setFixPInvar(true);
 	site_rates->setFixGammaShape(true);
 	vector<double> alphas, p_invars, logl;
@@ -2070,7 +2076,6 @@ void computeLoglFromUserInputGAMMAInvar(Params &params, IQTree &iqtree) {
 		aiFileResults << alphas.at(i) << " " << p_invars.at(i) << " ";
 		site_rates->setGammaShape(alphas.at(i));
 		site_rates->setPInvar(p_invars.at(i));
-		site_rates->computeRates();
 		iqtree.clearAllPartialLH();
 		double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001);
 		aiFileResults << lh << " " << iqtree.treeLength() << "\n";
@@ -2086,7 +2091,7 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
 		iqtree.setCurScore(iqtree.optimizeAllBranches(1));
 	else
 		iqtree.setCurScore(iqtree.computeLikelihood());
-	RateGammaInvar* site_rates = dynamic_cast<RateGammaInvar*>(iqtree.getRate());
+	RateHeterogeneity* site_rates = (iqtree.getRate());
 	double values[] = { 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 };
 	vector<double> initAlphas;
 	if (Params::getInstance().randomAlpha) {
@@ -2125,7 +2130,6 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
         iqtree.getModel()->decomposeRateMatrix();
         site_rates->setGammaShape(initAlphas[i]);
 		site_rates->setPInvar(initPInvar);
-		site_rates->computeRates();
 		iqtree.clearAllPartialLH();
 		iqtree.optimizeModelParameters(verbose_mode >= VB_MED, Params::getInstance().testAlphaEps);
         double estAlpha = iqtree.getRate()->getGammaShape();
@@ -2152,7 +2156,6 @@ void searchGAMMAInvarByRestarting(IQTree &iqtree) {
     ((ModelGTR*) iqtree.getModel())->setStateFrequency(bestStateFreqs);
 	iqtree.restoreBranchLengths(bestLens);
     iqtree.getModel()->decomposeRateMatrix();
-    site_rates->computeRates();
 	iqtree.clearAllPartialLH();
     iqtree.setCurScore(iqtree.computeLikelihood());
     cout << endl;
@@ -2183,7 +2186,7 @@ void exhaustiveSearchGAMMAInvar(Params &params, IQTree &iqtree) {
 	DoubleVector lenvec;
 	iqtree.saveBranchLengths(lenvec);
 
-	RateGammaInvar* site_rates = dynamic_cast<RateGammaInvar*>(iqtree.getRate());
+	RateHeterogeneity* site_rates = (iqtree.getRate());
 	site_rates->setFixPInvar(true);
 	site_rates->setFixGammaShape(true);
 
@@ -2191,7 +2194,6 @@ void exhaustiveSearchGAMMAInvar(Params &params, IQTree &iqtree) {
         for (double p_invar = p_invarMin; p_invar < p_invarMax; p_invar = p_invar + stepSize) {
             site_rates->setGammaShape(alpha);
             site_rates->setPInvar(p_invar);
-            site_rates->computeRates();
             iqtree.clearAllPartialLH();
             double lh = iqtree.getModelFactory()->optimizeParameters(params.fixed_branch_length, false, 0.001);
             stringstream ss;
diff --git a/phylosupertreeplen.cpp b/phylosupertreeplen.cpp
index 6782e9d..bc93d40 100644
--- a/phylosupertreeplen.cpp
+++ b/phylosupertreeplen.cpp
@@ -137,6 +137,8 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
             if (tree->part_info[part].cur_score == 0.0)
                 tree->part_info[part].cur_score = tree->at(part)->computeLikelihood();
         	cur_lh += tree->part_info[part].cur_score;
+            
+            
 
         	// normalize rates s.t. branch lengths are #subst per site
         	double mean_rate = tree->at(part)->getRate()->rescaleRates();
@@ -197,6 +199,11 @@ double PartitionModelPlen::optimizeParameters(bool fixed_len, bool write_info, d
     return tree_lh;
 }
 
+double PartitionModelPlen::optimizeParametersGammaInvar(bool fixed_len, bool write_info, double logl_epsilon, double gradient_epsilon) {
+    outError("This option does not work with edge-linked partition model yet");
+    return 0.0;
+}
+
 void PartitionModelPlen::writeInfo(ostream &out) {
     PhyloSuperTreePlen *tree = (PhyloSuperTreePlen*)site_rate->getTree();
     int ntrees = tree->size();
diff --git a/phylosupertreeplen.h b/phylosupertreeplen.h
index d54c46f..baa2979 100644
--- a/phylosupertreeplen.h
+++ b/phylosupertreeplen.h
@@ -130,6 +130,15 @@ public:
 	virtual double optimizeParameters(bool fixed_len = false, bool write_info = true,
                                       double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
 
+
+	/**
+	 *  optimize model parameters and tree branch lengths for the +I+G model
+	 *  using restart strategy.
+	 * 	@param fixed_len TRUE to fix branch lengths, default is false
+	 *	@return the best likelihood
+	 */
+	virtual double optimizeParametersGammaInvar(bool fixed_len = false, bool write_info = true, double logl_epsilon = 0.1, double gradient_epsilon = 0.001);
+
 	double optimizeGeneRate(double tol);
 
 //	virtual double targetFunk(double x[]);
diff --git a/phylotesting.cpp b/phylotesting.cpp
index 272b9e9..43e9a50 100644
--- a/phylotesting.cpp
+++ b/phylotesting.cpp
@@ -991,7 +991,7 @@ void testPartitionModel(Params &params, PhyloSuperTree* in_tree, vector<ModelInf
             this_aln = in_tree->at(distID[i] & ((1<<16)-1))->aln;
             dist[i] -= ((double)this_aln->getNSeq())*this_aln->getNPattern()*this_aln->num_states;
         }
-        if (params.num_threads > 1)
+        if (params.num_threads > 1 && num_pairs >= 1)
             quicksort(dist, 0, num_pairs-1, distID);
 
 #ifdef _OPENMP
@@ -1195,15 +1195,18 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 	in_tree->optimize_by_newton = params.optimize_by_newton;
 	in_tree->setLikelihoodKernel(params.SSE);
 
-    int num_rate_classes = 3 + params.max_rate_cats;
+//    int num_rate_classes = 3 + params.max_rate_cats;
 
-	RateHeterogeneity ** rate_class = new RateHeterogeneity*[num_rate_classes];
+	RateHeterogeneity ** rate_class = new RateHeterogeneity*[4];
 	rate_class[0] = new RateHeterogeneity();
 	rate_class[1] = new RateInvar(-1, NULL);
 	rate_class[2] = new RateGamma(params.num_rate_cats, params.gamma_shape, params.gamma_median, NULL);
 	rate_class[3] = new RateGammaInvar(params.num_rate_cats, params.gamma_shape, params.gamma_median, -1, params.optimize_model_rate_joint, NULL);
-    for (model = 4; model < num_rate_classes; model++)
-        rate_class[model] = new RateFree(model-2, params.gamma_shape, "", false, params.optimize_alg, NULL);
+    
+    RateFree ** rate_class_free = new RateFree*[params.max_rate_cats-1];
+    
+    for (model = 0; model < params.max_rate_cats-1; model++)
+        rate_class_free[model] = new RateFree(model+2, params.gamma_shape, "", false, params.optimize_alg, NULL);
         
 	ModelGTR *subst_model = NULL;
 	if (seq_type == SEQ_BINARY)
@@ -1348,7 +1351,7 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                 if (ncat <= 1) outError("Number of rate categories for " + model_names[model] + " is <= 1");
                 if (ncat > params.max_rate_cats)
                     outError("Number of rate categories for " + model_names[model] + " exceeds " + convertIntToString(params.max_rate_cats));
-                tree->setRate(rate_class[2+ncat]);
+                tree->setRate(rate_class_free[ncat-2]);
             } else if (model_names[model].find("+I") != string::npos && (pos = model_names[model].find("+G")) != string::npos) {
                 tree->setRate(rate_class[3]);
                 if (model_names[model].length() > pos+2 && isdigit(model_names[model][pos+2])) {
@@ -1428,6 +1431,10 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                     outError("-mtree option is not supported for partition model");
                 }
                 IQTree *iqtree = new IQTree(in_tree->aln);
+                // set checkpoint
+                iqtree->setCheckpoint(in_tree->getCheckpoint());
+                iqtree->num_precision = in_tree->num_precision;
+                
                 cout << endl << "===> Testing model " << model+1 << ": " << params.model_name << endl;
                 runTreeReconstruction(params, original_model, *iqtree, model_info);
                 info.logl = iqtree->computeLikelihood();
@@ -1436,6 +1443,16 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                 params.model_name = original_model;
                 params.user_file = orig_user_tree;
                 tree = iqtree;
+
+                // clear all checkpointed information
+                Checkpoint *newCheckpoint = new Checkpoint;
+                tree->getCheckpoint()->getSubCheckpoint(newCheckpoint, "iqtree");
+                tree->getCheckpoint()->clear();
+                tree->getCheckpoint()->insert(newCheckpoint->begin(), newCheckpoint->end());
+                tree->getCheckpoint()->putBool("finished", false);
+                tree->getCheckpoint()->dump(true);
+                delete newCheckpoint;
+
             } else {
                 if (tree->getMemoryRequired() > RAM_requirement) {
                     tree->deleteAllPartialLh();
@@ -1463,7 +1480,8 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
                     {
                         if (verbose_mode >= VB_MED)
                             cout << "reoptimizing from previous parameters of +R...." << endl;
-                        dynamic_cast<RateFree*>(rate_class[2+ncat])->setRateAndProp(dynamic_cast<RateFree*>(rate_class[1+ncat]));
+                        assert(ncat >= 3);
+                        rate_class_free[ncat-2]->setRateAndProp(rate_class_free[ncat-3]);
                         info.logl = tree->getModelFactory()->optimizeParameters(false, false, TOL_LIKELIHOOD_MODELTEST, TOL_GRADIENT_MODELTEST);
                         info.tree_len = tree->treeLength();                        
                     }
@@ -1688,10 +1706,17 @@ string testModel(Params &params, PhyloTree* in_tree, vector<ModelInfo> &model_in
 
 	delete model_fac;
 	delete subst_model;
-	for (int rate_type = num_rate_classes-1; rate_type >= 0; rate_type--) {
+    int rate_type;
+	for (rate_type = 3; rate_type >= 0; rate_type--) {
 		delete rate_class[rate_type];
     }
     delete [] rate_class;
+    
+	for (rate_type = params.max_rate_cats-2; rate_type >= 0; rate_type--) {
+		delete rate_class_free[rate_type];
+    }
+    delete [] rate_class_free;
+    
 //	delete tree_hetero;
 //	delete tree_homo;
 	in_tree->deleteAllPartialLh();
diff --git a/phylotree.cpp b/phylotree.cpp
index bdc9da4..2221943 100644
--- a/phylotree.cpp
+++ b/phylotree.cpp
@@ -360,7 +360,10 @@ void PhyloTree::readTreeString(const string &tree_string) {
 //	str(tree_string);
 //	str.seekg(0, ios::beg);
 	freeNode();
-	readTree(str, rooted);
+    
+    // bug fix 2016-04-14: in case taxon name happens to be ID
+	MTree::readTree(str, rooted);
+    
     assignLeafNames();
 //	setAlignment(aln);
 	setRootNode(params->root);
diff --git a/phylotree.h b/phylotree.h
index 2ed8388..68b33dd 100644
--- a/phylotree.h
+++ b/phylotree.h
@@ -264,22 +264,22 @@ const double TP_MAX_EXP_DIFF = 40.0;
 #define LM_MAX  10
 
 struct QuartetGroups{
-    int numGroups;	// number of clusters:
-			// 0:	not initialized, default -> 1
-			// 1:	no clusters - any (a,b)|(c,d)
-			// 2:	2 clusters  - (a,a')|(b,b')
-			// 3:	3 clusters  - (a,a')|(b,c)	[rare]
-			// 4:	4 clusters  - (a,b)|(c,d)
-    int numSeqs;	// number of seqs in alignment (should be #A+#B+#C+#D+#X)
-    int numQuartSeqs;	// number of seqs in analysis  (should be #A+#B+#C+#D)
-    int numGrpSeqs[5];	// number of seqs in cluster A, B, C, D, and X (exclude)
-    int uniqueQuarts;	// number of existing unique quartets for this grouping
-    string Name[5];	// seqIDs of cluster A
-    vector<int> GroupA;	// seqIDs of cluster A
-    vector<int> GroupB;	// seqIDs of cluster B
-    vector<int> GroupC;	// seqIDs of cluster C
-    vector<int> GroupD;	// seqIDs of cluster D
-    vector<int> GroupX;	// seqIDs of cluster X
+    int numGroups;		// number of clusters:
+				// 0:	not initialized, default -> 1
+				// 1:	no clusters - any (a,b)|(c,d)
+				// 2:	2 clusters  - (a,a')|(b,b')
+				// 3:	3 clusters  - (a,a')|(b,c)	[rare]
+				// 4:	4 clusters  - (a,b)|(c,d)
+    int numSeqs;		// number of seqs in alignment (should be #A+#B+#C+#D+#X)
+    int numQuartSeqs;		// number of seqs in analysis  (should be #A+#B+#C+#D)
+    int numGrpSeqs[5];		// number of seqs in cluster A, B, C, D, and X (exclude)
+    int64_t uniqueQuarts;	// number of existing unique quartets for this grouping
+    string Name[5];		// seqIDs of cluster A
+    vector<int> GroupA;		// seqIDs of cluster A
+    vector<int> GroupB;		// seqIDs of cluster B
+    vector<int> GroupC;		// seqIDs of cluster C
+    vector<int> GroupD;		// seqIDs of cluster D
+    vector<int> GroupX;		// seqIDs of cluster X
 };
 
 struct QuartetInfo {
@@ -292,7 +292,7 @@ struct QuartetInfo {
 };
 
 struct SeqQuartetInfo {
-    unsigned long countarr[LM_MAX]; // the 7 areas of the simplex triangle [0-6; corners (0:top, 1:right, 2:left), rectangles (3:right, 4:left, 5:bottom), 6:center] and the 3 corners [7-9; 7:top, 8:right, 9:left]
+    int64_t countarr[LM_MAX]; // the 7 areas of the simplex triangle [0-6; corners (0:top, 1:right, 2:left), rectangles (3:right, 4:left, 5:bottom), 6:center] and the 3 corners [7-9; 7:top, 8:right, 9:left]
 };
 
 // ********************************************
diff --git a/quartet.cpp b/quartet.cpp
index 2fe668d..eca150d 100644
--- a/quartet.cpp
+++ b/quartet.cpp
@@ -201,7 +201,7 @@ void plotlmpointsvg(FILE *ofp, double w1, double w2)
 
 
 // void finishsvg(FILE *ofp, unsigned long **countarr)
-void finishsvg(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, unsigned long Numquartets)
+void finishsvg(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, int64_t Numquartets)
 {
 	fprintf(ofp,"  </g>\n");
 	/* end triangle 1 (top) */
@@ -550,7 +550,7 @@ void plotlmpointcolor(FILE *epsofp, FILE *svgofp, double w1, double w2, int red,
 
 /* last lines of EPSF likelihood mapping file */
 //void finisheps(FILE *ofp, unsigned long **countarr)
-void finisheps(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, unsigned long Numquartets)
+void finisheps(FILE *ofp, vector<SeqQuartetInfo> lmap_seq_quartet_info, int leafNum, int64_t Numquartets)
 {
 	fprintf(ofp, "stroke\n");
 	fprintf(ofp, "%% second triangle (the one with 3 basins)\n");
@@ -674,8 +674,6 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
     if (leafNum < 4) 
         outError("Tree must have 4 or more taxa with unique sequences!");
         
-    lmap_quartet_info.resize(params->lmap_num_quartets);
-    
     int qc[] = {0, 1, 2, 3,  0, 2, 1, 3,  0, 3, 1, 2};
     
     double onethird = 1.0/3.0;
@@ -733,21 +731,97 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
 	   size2 = sizeA-3;
 	   size1 = sizeA-2;
 	   size0 = sizeA-1;
-	   LMGroups.uniqueQuarts = 1     + size3 +
-	                           size2 * (size2-1) / 2 +
-	                           size1 * (size1-1) * (size1-2) / 6 +
-	                           size0 * (size0-1) * (size0-2) * (size0-3) / 24;
+	   LMGroups.uniqueQuarts = (int64_t)1 + size3 +
+	                           (int64_t)size2 * (size2-1) / 2 +
+	                           (int64_t)size1 * (size1-1) * (size1-2) / 6 +
+	                           (int64_t)size0 * (size0-1) * (size0-2) * (size0-3) / 24;
 	   break;
 	case 2: 
-	   LMGroups.uniqueQuarts = (sizeA * (sizeA - 1)) / 2 * (sizeB * (sizeB - 1)) / 2; break;
+	   LMGroups.uniqueQuarts = ((int64_t)sizeA * (sizeA - 1)) / 2 * (sizeB * (sizeB - 1)) / 2; break;
 	case 3: 
-	   LMGroups.uniqueQuarts = sizeA * sizeB * (sizeC * (sizeC - 1)) / 2; break;
+	   LMGroups.uniqueQuarts = (int64_t)sizeA * sizeB * (sizeC * (sizeC - 1)) / 2; break;
 	case 4: 
-	   LMGroups.uniqueQuarts = sizeA * sizeB * sizeC * sizeD; break;
+	   LMGroups.uniqueQuarts = (int64_t)sizeA * sizeB * sizeC * sizeD; break;
 	default: 
 	   outError("Unknown Likelihood Mapping mode! PLEASE report this to the developers!"); 
 	   break;
     }
+
+    if (params->lmap_num_quartets == 0)
+        params->lmap_num_quartets = LMGroups.uniqueQuarts;
+    if (params->lmap_num_quartets > LMGroups.uniqueQuarts) {
+        cout << "INFO: Number of quartets is reduced to all unique quartets " << LMGroups.uniqueQuarts << endl; 
+    }
+
+    cout << "Computing " << params->lmap_num_quartets << " quartet likelihoods (one dot represents 100 quartets)." << endl << endl;
+    
+    lmap_quartet_info.resize(params->lmap_num_quartets);
+    
+    bool quartets_drawn = false;
+    
+    if (params->lmap_num_quartets == LMGroups.uniqueQuarts) {
+        // draw all unique quartets now
+        quartets_drawn = true;
+        int64_t qid = 0;
+        switch (numGroups) {
+        case 1:
+            for (int i0 = 0; i0 < sizeA-3; i0++)
+                for (int i1 = i0+1; i1 < sizeA-2; i1++)
+                    for (int i2 = i1+1; i2 < sizeA-1; i2++)
+                        for (int i3 = i2+1; i3 < sizeA; i3++) {
+                            lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0];
+                            lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[i1];
+                            lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[i2];
+                            lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[i3];
+                            qid++;
+                        }
+            break;
+
+        case 2:
+            for (int i0 = 0; i0 < sizeA-1; i0++)
+                for (int i1 = i0+1; i1 < sizeA; i1++)
+                    for (int i2 = 0; i2 < sizeB-1; i2++)
+                        for (int i3 = i2+1; i3 < sizeB; i3++) {
+                            lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0];
+                            lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[i1];
+                            lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[i2];
+                            lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[i3];
+                            qid++;
+                        }
+            break;
+
+        case 3:
+            for (int i0 = 0; i0 < sizeA; i0++)
+                for (int i1 = 0; i1 < sizeB; i1++)
+                    for (int i2 = 0; i2 < sizeC-1; i2++)
+                        for (int i3 = i2+1; i3 < sizeC; i3++) {
+                            lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0];
+                            lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[i1];
+                            lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[i2];
+                            lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[i3];
+                            qid++;
+                        }
+            break;
+        case 4:
+            for (int i0 = 0; i0 < sizeA; i0++)
+                for (int i1 = 0; i1 < sizeB; i1++)
+                    for (int i2 = 0; i2 < sizeC; i2++)
+                        for (int i3 = 0; i3 < sizeD; i3++) {
+                            lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[i0];
+                            lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[i1];
+                            lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[i2];
+                            lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[i3];
+                            qid++;
+                        }
+            break;
+
+        default:
+            break;
+        }
+        // sanity check
+        assert(qid == LMGroups.uniqueQuarts);
+    }
+    
     // fprintf(stderr,"XXX - #quarts: %d; #groups: %d, A: %d, B:%d, C:%d, D:%d\n", LMGroups.uniqueQuarts, LMGroups.numGroups, sizeA, sizeB, sizeC, sizeD);
     
 
@@ -763,57 +837,58 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
 #ifdef _OPENMP
     #pragma omp for schedule(guided)
 #endif
-    for (int qid = 0; qid < params->lmap_num_quartets; qid++) { /*** draw lmap_num_quartets quartets randomly ***/
-	// fprintf(stderr, "%d\n", qid); 
+    for (int64_t qid = 0; qid < params->lmap_num_quartets; qid++) { /*** draw lmap_num_quartets quartets randomly ***/
+	// fprintf(stderr, "%I64d\n", qid); 
 
         // uniformly draw 4 taxa
 	// (a) sample taxon 1
         // was: lmap_quartet_info[qid].seqID[0] = random_int(leafNum);
-	lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[random_int(sizeA, rstream)];
-
-        do {
-	    // (b) sample taxon 2 according to the number of clusters
-            // was: lmap_quartet_info[qid].seqID[1] = random_int(leafNum);
-	    switch(numGroups){
-		case 1: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|a3,a4
-		case 2: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|b1,b2
-		case 3: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c1,c2
-		case 4: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c, d
-		default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
-	    }
-        } while (lmap_quartet_info[qid].seqID[1] == lmap_quartet_info[qid].seqID[0]);
-        do {
-	    // (c) sample taxon 3 according to the number of clusters
-            // was: lmap_quartet_info[qid].seqID[2] = random_int(leafNum);
-	    switch(numGroups){
-		case 1: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|A3,a4
-		case 2: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|B1,b2
-		case 3: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C1,c2
-		case 4: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C, d
-		default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
-	    }
-        } while (lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[1]);
-        do {
-	    // (d) sample taxon 4 according to the number of clusters
-            // was: lmap_quartet_info[qid].seqID[3] = random_int(leafNum);
-	    switch(numGroups){
-		case 1: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|a3,A4
-		case 2: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|b1,B2
-		case 3: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |c1,C2
-		case 4: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[random_int(sizeD, rstream)]; break; // a ,b |c, D
-		default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
-	    }
-        } while (lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[1]
-            || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[2]);
-            
+        if (!quartets_drawn) {
+            // draw a random quartet
+            lmap_quartet_info[qid].seqID[0] = LMGroups.GroupA[random_int(sizeA, rstream)];
+
+            do {
+            // (b) sample taxon 2 according to the number of clusters
+                // was: lmap_quartet_info[qid].seqID[1] = random_int(leafNum);
+            switch(numGroups){
+            case 1: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|a3,a4
+            case 2: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,A2|b1,b2
+            case 3: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c1,c2
+            case 4: lmap_quartet_info[qid].seqID[1] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a ,B |c, d
+            default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
+            }
+            } while (lmap_quartet_info[qid].seqID[1] == lmap_quartet_info[qid].seqID[0]);
+            do {
+            // (c) sample taxon 3 according to the number of clusters
+                // was: lmap_quartet_info[qid].seqID[2] = random_int(leafNum);
+            switch(numGroups){
+            case 1: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|A3,a4
+            case 2: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|B1,b2
+            case 3: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C1,c2
+            case 4: lmap_quartet_info[qid].seqID[2] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |C, d
+            default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
+            }
+            } while (lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[2] == lmap_quartet_info[qid].seqID[1]);
+            do {
+            // (d) sample taxon 4 according to the number of clusters
+                // was: lmap_quartet_info[qid].seqID[3] = random_int(leafNum);
+            switch(numGroups){
+            case 1: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupA[random_int(sizeA, rstream)]; break; // a1,a2|a3,A4
+            case 2: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupB[random_int(sizeB, rstream)]; break; // a1,a2|b1,B2
+            case 3: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupC[random_int(sizeC, rstream)]; break; // a ,b |c1,C2
+            case 4: lmap_quartet_info[qid].seqID[3] = LMGroups.GroupD[random_int(sizeD, rstream)]; break; // a ,b |c, D
+            default: outError("Unknown Likelihood Mapping sampling mode! PLEASE report this to the developers!"); break;
+            }
+            } while (lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[0] || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[1]
+                || lmap_quartet_info[qid].seqID[3] == lmap_quartet_info[qid].seqID[2]);
+        }
 // fprintf(stderr, "qqq%d: %d, %d, %d, %d\n", qid, lmap_quartet_info[qid].seqID[0], lmap_quartet_info[qid].seqID[1], lmap_quartet_info[qid].seqID[2], lmap_quartet_info[qid].seqID[3]); 
 
 	// *** taxa should not be sorted, because that changes the corners a dot is assigned to - removed HAS ;^)
         // obsolete: sort(lmap_quartet_info[qid].seqID, lmap_quartet_info[qid].seqID+4); // why sort them?!? HAS ;^)
-            
+
         // initialize sub-alignment and sub-tree
         Alignment *quartet_aln;
-        PhyloTree *quartet_tree;
         if (aln->isSuperAlignment()) {
             quartet_aln = new SuperAlignment;
         } else {
@@ -821,63 +896,75 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
         }
         IntVector seq_id;
         seq_id.insert(seq_id.begin(), lmap_quartet_info[qid].seqID, lmap_quartet_info[qid].seqID+4);
-        quartet_aln->extractSubAlignment(aln, seq_id, 0);
-        if (isSuperTree()) {
-            quartet_tree = new PhyloSuperTree((SuperAlignment*)quartet_aln, (PhyloSuperTree*)this);
+        IntVector kept_partitions;
+        // only keep partitions with at least 3 sequences
+        quartet_aln->extractSubAlignment(aln, seq_id, 0, 3, &kept_partitions);
+                
+        if (kept_partitions.size() == 0) {
+            // nothing kept
+            for (int k = 0; k < 3; k++) {
+                lmap_quartet_info[qid].logl[k] = -1.0;
+            }
         } else {
-            quartet_tree = new PhyloTree(quartet_aln);
-        }
+            // something partition kept, do computations
+            PhyloTree *quartet_tree;
+            if (isSuperTree()) {
+                quartet_tree = new PhyloSuperTree((SuperAlignment*)quartet_aln, (PhyloSuperTree*)this);
+            } else {
+                quartet_tree = new PhyloTree(quartet_aln);
+            }
 
-        // set up parameters
-        quartet_tree->setParams(params);
-        quartet_tree->optimize_by_newton = params->optimize_by_newton;
-        quartet_tree->setLikelihoodKernel(params->SSE);
-
-        // set up partition model
-        if (isSuperTree()) {
-            PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree;
-            PhyloSuperTree *super_tree = (PhyloSuperTree*)this;
-            for (int i = 0; i < super_tree->size(); i++) {
-                quartet_super_tree->at(i)->setModelFactory(super_tree->at(i)->getModelFactory());
-                quartet_super_tree->at(i)->setModel(super_tree->at(i)->getModel());
-                quartet_super_tree->at(i)->setRate(super_tree->at(i)->getRate());
+            // set up parameters
+            quartet_tree->setParams(params);
+            quartet_tree->optimize_by_newton = params->optimize_by_newton;
+            quartet_tree->setLikelihoodKernel(params->SSE);
+
+            // set up partition model
+            if (isSuperTree()) {
+                PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree;
+                PhyloSuperTree *super_tree = (PhyloSuperTree*)this;
+                for (int i = 0; i < quartet_super_tree->size(); i++) {
+                    quartet_super_tree->at(i)->setModelFactory(super_tree->at(kept_partitions[i])->getModelFactory());
+                    quartet_super_tree->at(i)->setModel(super_tree->at(kept_partitions[i])->getModel());
+                    quartet_super_tree->at(i)->setRate(super_tree->at(kept_partitions[i])->getRate());
+                }
             }
-        }
-        
-        // set model and rate
-        quartet_tree->setModelFactory(model_factory);
-        quartet_tree->setModel(getModel());
-        quartet_tree->setRate(getRate());
-        // NOTE: we don't need to set phylo_tree in model and rate because parameters are not reoptimized
-        
-        
-        
-        // loop over 3 quartets to compute likelihood
-        for (int k = 0; k < 3; k++) {
-            string quartet_tree_str;
-            quartet_tree_str = "(" + quartet_aln->getSeqName(qc[k*4]) + "," + quartet_aln->getSeqName(qc[k*4+1]) + ",(" + 
-                quartet_aln->getSeqName(qc[k*4+2]) + "," + quartet_aln->getSeqName(qc[k*4+3]) + "));";
-            quartet_tree->readTreeStringSeqName(quartet_tree_str);
-            quartet_tree->initializeAllPartialLh();
-            quartet_tree->wrapperFixNegativeBranch(true);
-            // optimize branch lengths with logl_epsilon=0.1 accuracy
-            lmap_quartet_info[qid].logl[k] = quartet_tree->optimizeAllBranches(10, 0.1);
-        }
-        // reset model & rate so that they are not deleted
-        quartet_tree->setModel(NULL);
-        quartet_tree->setModelFactory(NULL);
-        quartet_tree->setRate(NULL);
-
-        if (isSuperTree()) {
-            PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree;
-            for (int i = 0; i < quartet_super_tree->size(); i++) {
-                quartet_super_tree->at(i)->setModelFactory(NULL);
-                quartet_super_tree->at(i)->setModel(NULL);
-                quartet_super_tree->at(i)->setRate(NULL);
+            
+            // set model and rate
+            quartet_tree->setModelFactory(model_factory);
+            quartet_tree->setModel(getModel());
+            quartet_tree->setRate(getRate());
+            // NOTE: we don't need to set phylo_tree in model and rate because parameters are not reoptimized
+            
+            
+            
+            // loop over 3 quartets to compute likelihood
+            for (int k = 0; k < 3; k++) {
+                string quartet_tree_str;
+                quartet_tree_str = "(" + quartet_aln->getSeqName(qc[k*4]) + "," + quartet_aln->getSeqName(qc[k*4+1]) + ",(" + 
+                    quartet_aln->getSeqName(qc[k*4+2]) + "," + quartet_aln->getSeqName(qc[k*4+3]) + "));";
+                quartet_tree->readTreeStringSeqName(quartet_tree_str);
+                quartet_tree->initializeAllPartialLh();
+                quartet_tree->wrapperFixNegativeBranch(true);
+                // optimize branch lengths with logl_epsilon=0.1 accuracy
+                lmap_quartet_info[qid].logl[k] = quartet_tree->optimizeAllBranches(10, 0.1);
             }
+            // reset model & rate so that they are not deleted
+            quartet_tree->setModel(NULL);
+            quartet_tree->setModelFactory(NULL);
+            quartet_tree->setRate(NULL);
+
+            if (isSuperTree()) {
+                PhyloSuperTree *quartet_super_tree = (PhyloSuperTree*)quartet_tree;
+                for (int i = 0; i < quartet_super_tree->size(); i++) {
+                    quartet_super_tree->at(i)->setModelFactory(NULL);
+                    quartet_super_tree->at(i)->setModel(NULL);
+                    quartet_super_tree->at(i)->setRate(NULL);
+                }
+            }
+            delete quartet_tree;
         }
-
-        delete quartet_tree;
+        
         delete quartet_aln;
 
         // determine likelihood order
@@ -1036,7 +1123,22 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
 		lmap_quartet_info[qid].area=6; // LM_REG7 - center 
 	}
 
+	{
+		int64_t count = (qid+1);
+		if ((count % 100) == 0) {
+			cout << '.';
+			if ((count % 1000) == 0) { // separator after 10 dots
+				cout << ' ';
+				if ((count % 5000) == 0) // new-line after 50 dots
+					cout << " : " << count << endl;
+			}
+			cout.flush();
+		}
+	}
     } /*** end draw lmap_num_quartets quartets randomly ***/
+    if ((params->lmap_num_quartets % 5000) != 0) {
+	cout << ". : " << params->lmap_num_quartets << flush << endl << endl;
+    } else cout << endl;
 
 #ifdef _OPENMP
     finish_random(rstream);
@@ -1048,6 +1150,79 @@ void PhyloTree::computeQuartetLikelihoods(vector<QuartetInfo> &lmap_quartet_info
 
 //**************************************
 
+/**
+    read groups in following format "(A, B, C, D), (E, F), (G, H), (I);"
+**/
+void readGroupNewick(char *filename, MSetsBlock *sets_block) {
+    try {
+        ifstream in;
+        in.exceptions(ios::failbit | ios::badbit);
+        in.open(filename);
+        in.exceptions(ios::badbit);
+        char ch;
+        string name;
+        TaxaSetNameVector *sets = sets_block->getSets();
+        while (!in.eof()) {
+            // read the cluster
+            TaxaSetName *myset = new TaxaSetName;
+			sets->push_back(myset);            
+            in >> ch;
+            if (ch != '(')
+                throw "Cluster does not start with '('";
+            // read taxon name
+            do {
+                in >> ch;
+                name = "";
+                do {
+                    name += ch;
+                    ch = in.get();
+                    if (controlchar(ch)) {
+                        in >> ch;
+                        break;
+                    }
+                } while (ch != ',' && ch != ')');
+                myset->taxlist.push_back(name);
+                if (ch == ',') continue; // continue to read next taxon name
+                if (ch == ')') break;
+                throw "No ',' or ')' found after " + name;
+            } while (ch != ')');
+            in >> ch;
+            if (isalnum(ch)) {
+                // read cluster name
+                name = "";
+                do {
+                    name += ch;
+                    ch = in.get();
+                    if (controlchar(ch)) {
+                        in >> ch;
+                        break;
+                    }
+                } while (ch != ',' && ch != ';');
+                myset->name = name;
+            } else {
+                myset->name = "Cluster" + convertIntToString(sets->size());
+            }
+            // check for duplicated name
+            for (TaxaSetNameVector::iterator it = sets->begin(); it != sets->end()-1; it++)
+                if ((*it)->name == myset->name)
+                    throw "Duplicated cluster name " + myset->name;
+            if (ch == ',') continue; // continue to read next cluster
+            if (ch == ';') break;
+            throw "No ',' or ';' found after " + name + ")";
+        }
+        
+        in.clear();
+        // set the failbit again
+        in.exceptions(ios::failbit | ios::badbit);
+        in.close();
+    } catch (ios::failure) {
+        outError(ERR_READ_INPUT);
+    } catch (const char* str) {
+        outError(str);
+    } catch (string &str) {
+        outError(str);
+    }
+}
 
 void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGroups) {
 
@@ -1056,17 +1231,26 @@ void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGro
     MSetsBlock *lmclusters;
     lmclusters = new MSetsBlock();
     cout << endl << "Reading likelihood mapping cluster file " << filename << "..." << endl;
-    cout << "(The leading numbers represent the order from the master alignment.)" << endl << endl;
 
-    MyReader nexus(filename);
-
-    nexus.Add(lmclusters);
-
-    MyToken token(nexus.inf);
-    nexus.Execute(token);
+    InputType intype = detectInputFile(filename);
+    
+    if (intype == IN_NEXUS) {
+        MyReader nexus(filename);
+        nexus.Add(lmclusters);
+        MyToken token(nexus.inf);
+        nexus.Execute(token);
+    } else if (intype == IN_NEWICK) {
+        readGroupNewick(filename, lmclusters);
+    } else
+        outError("Unknown cluster file format, please use NEXUS or RAxML-style format");
+
+    if (lmclusters->getNSets() == 0)
+        outError("No cluster found");
 
     // lmclusters->Report(cout);
 
+    cout << "(The leading numbers represent the order from the master alignment.)" << endl << endl;
+
     TaxaSetNameVector *allsets = lmclusters->getSets();
     numsets = allsets->size();
 
@@ -1148,6 +1332,7 @@ void PhyloTree::readLikelihoodMappingGroups(char *filename, QuartetGroups &LMGro
     }
     LMGroups.numGroups = n;
     
+    delete lmclusters;
 
 } // end PhyloTree::readLikelihoodMappingGroups
 
@@ -1159,16 +1344,27 @@ void PhyloTree::doLikelihoodMapping() {
     // vector<SeqQuartetInfo> lmap_seq_quartet_info;
     // int areacount[8] = {0, 0, 0, 0, 0, 0, 0, 0};
     // int cornercount[4] = {0, 0, 0, 0};
-    int resolved, partly, unresolved;
-    int qid;
+    int64_t resolved, partly, unresolved;
+    int64_t qid;
     ofstream out;
     string filename;
-
+    
     if(params->lmap_cluster_file != NULL) {
 	// cout << "YYY: test reading" << params->lmap_cluster_file << endl;
-	readLikelihoodMappingGroups(params->lmap_cluster_file, LMGroups);
+        readLikelihoodMappingGroups(params->lmap_cluster_file, LMGroups);
     } else {
-	LMGroups.numGroups = 0; /* no clusterfile -> un-initialized */
+        LMGroups.numGroups = 0; /* no clusterfile -> un-initialized */
+        int64_t recommended_quartets;
+        if (aln->getNSeq() > 10) {
+            recommended_quartets = (int64_t)25*aln->getNSeq();
+        } else {
+            int64_t n = aln->getNSeq();
+            recommended_quartets = n*(n-1)*(n-2)*(n-3)/24;
+        }
+        if (params->lmap_num_quartets > 0 && params->lmap_num_quartets < recommended_quartets) {
+            outWarning("Number of quartets is recommended to be at least " + convertInt64ToString(recommended_quartets) + " s.t. each sequence is sampled sufficiently");
+        }
+
     }
 
     areacount[0] = 0;
@@ -1198,8 +1394,6 @@ void PhyloTree::doLikelihoodMapping() {
         lmap_seq_quartet_info[qid].countarr[9] = 0;
     }
 
-    cout << "Computing quartet likelihoods..." << endl << endl;
-
     computeQuartetLikelihoods(lmap_quartet_info, LMGroups);
 
     for (qid = 0; qid < params->lmap_num_quartets; qid++) {
@@ -1254,11 +1448,12 @@ void PhyloTree::doLikelihoodMapping() {
 
     if (params->print_lmap_quartet_lh) {
         // print quartet file
+        out << "SeqIDs\tlh1\tlh2\tlh3\tweight1\tweight2\tweight3" << endl;
         for (qid = 0; qid < params->lmap_num_quartets; qid++) {
-            out << "(" << lmap_quartet_info[qid].seqID[0] << ","
-                << lmap_quartet_info[qid].seqID[1] << ","
-                << lmap_quartet_info[qid].seqID[2] << ","
-                << lmap_quartet_info[qid].seqID[3] << ")"
+            out << "(" << lmap_quartet_info[qid].seqID[0]+1 << ","
+                << lmap_quartet_info[qid].seqID[1]+1 << ","
+                << lmap_quartet_info[qid].seqID[2]+1 << ","
+                << lmap_quartet_info[qid].seqID[3]+1 << ")"
                 << "\t" << lmap_quartet_info[qid].logl[0] 
                 << "\t" << lmap_quartet_info[qid].logl[1] 
                 << "\t" << lmap_quartet_info[qid].logl[2]
@@ -1267,128 +1462,15 @@ void PhyloTree::doLikelihoodMapping() {
                 << "\t" << lmap_quartet_info[qid].qweight[2] << endl;
         }
 
-        PhyloTree::reportLikelihoodMapping(out);
-
-    /**** begin of report output ****/
-    /**** moved to PhyloTree::reportLikelihoodMapping ****/
-#if 0
-	// LM_REG1 0   /* top corner */
-	// LM_REG2 1   /* bottom-right corner */
-	// LM_REG3 2   /* bottom-left corner */
-	// LM_REG4 3   /* right rectangle */
-	// LM_REG5 4   /* bottom rectangle */
-	// LM_REG6 5   /* left rectangle */
-	// LM_REG7 6   /* center */
-	// LM_AR1  7   /* top third */
-	// LM_AR2  8   /* bottom-right third */
-	// LM_AR3  9   /* bottom-left third */
-
-	out << "LIKELIHOOD MAPPING ANALYSIS" << endl << endl;
-	out << "Number of quartets: " << params->lmap_num_quartets << "(random choice)" << endl << endl;
-	out << "Quartet trees are based on the selected model of substitution." << endl << endl;
-	out << "Sequences are not grouped in clusters." << endl;
-
-        out << endl << endl;
-	out << "LIKELIHOOD MAPPING STATISTICS" << endl << endl;
-
-	out << "           (a,b)-(c,d)                              (a,b)-(c,d)      " << endl;
-	out << "                /\\                                      /\\           " << endl;
-	out << "               /  \\                                    /  \\          " << endl;
-	out << "              /    \\                                  /  1 \\         " << endl;
-	out << "             /  a1  \\                                / \\  / \\        " << endl;
-	out << "            /\\      /\\                              /   \\/   \\       " << endl;
-	out << "           /  \\    /  \\                            /    /\\    \\      " << endl;
-	out << "          /    \\  /    \\                          / 6  /  \\  4 \\     " << endl;
-	out << "         /      \\/      \\                        /\\   /  7 \\   /\\    " << endl;
-	out << "        /        |       \\                      /  \\ /______\\ /  \\   " << endl;
-	out << "       /   a3    |    a2  \\                    / 3  |    5   |  2 \\  " << endl;
-	out << "      /__________|_________\\                  /_____|________|_____\\ " << endl;
-	out << "(a,d)-(b,c)            (a,c)-(b,d)      (a,b)-(c,d)            (a,c)-(b,d) " << endl << endl;
-	out << "For more information about likelihood-mapping refer to" << endl;
-   	out << "   Strimmer and von Haeseler (1997) PNAS 94:6815-6819" << endl;
-	out << "   http://www.ncbi.nlm.nih.gov/pubmed/9192648" << endl;
-	out << "and/or" << endl;
-   	out << "   Schmidt and von Haeseler (2003) Current Protocols in Bioinformatics" << endl;
-   	out << "   (by Baxevanis et al., Eds.), Unit 6, Wiley&Sons, New York." << endl;
-	out << "   http://dx.doi.org/10.1002/0471250953.bi0606s17" << endl;
-
-
-        out << endl << endl;
-        out << "Quartet support of regions a1, a2, a3:" << endl << endl;
-        out << "     #quartets    a1 (% a1)        a2 (% a2)        a3 (% a3)    name" << endl;
-        for (qid = 0; qid < leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
-
-            out.setf(ios::fixed, ios::floatfield); // set fixed floating format
-            out.precision(2);
-            out << setw(4) << qid+1 
-                << setw(9) << sumq
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[7]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[7]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[8]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[8]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[9]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[9]/sumq << ")  " 
-        	<< PhyloTree::aln->getSeqName(qid) << endl;
-        }
-
-        out << endl << endl << "Quartet support of areas 1-7:" << endl << endl;
-        out << "                   resolved                                           partly                                             unresolved  name" << endl;
-        out << "     #quartets     1 (% 1)          2 (% 2)          3 (% 3)          4 (% 4)          5 (% 5)          6 (% 6)          7 (% 7)" << endl;
-        for (qid = 0; qid < leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
-
-            out.setf(ios::fixed, ios::floatfield); // set fixed floating format
-            out.precision(2);
-            out << setw(4) << qid+1 
-                << setw(9) << sumq
-                << setw(7) << lmap_seq_quartet_info[qid].countarr[0] 
-		<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[0]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[1] 
-                << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[1]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[2]
-                << " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[2]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[3]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[3]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[4]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[4]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[5]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[5]/sumq << ") "
-        	<< setw(7) << lmap_seq_quartet_info[qid].countarr[6]
-        	<< " (" << setw(6) << (double) 100.0*lmap_seq_quartet_info[qid].countarr[6]/sumq << ")  "
-        	<< PhyloTree::aln->getSeqName(qid) << endl;
-        }
+//        PhyloTree::reportLikelihoodMapping(out);
 
-        out << endl << endl << "Quartet resolution per sequence:" << endl << endl;
-        out << "      #quartets   resolved           partly          unresolved  name" << endl;
-        for (qid = 0; qid < leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3];
-	    unsigned long partly   = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6];
-	    unsigned long unres    = lmap_seq_quartet_info[qid].countarr[LM_REG7];
-	    unsigned long sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
-
-            out.setf(ios::fixed, ios::floatfield); // set fixed floating format
-            out.precision(2);
-            out << setw(4) << qid+1 
-                << setw(9) << sumq
-                << setw(7) << resolved
-		<< " (" << setw(6) << (double) 100.0*resolved/sumq << ") "
-        	<< setw(7) << partly
-                << " (" << setw(6) << (double) 100.0*partly/sumq << ") "
-        	<< setw(7) << unres
-                << " (" << setw(6) << (double) 100.0*unres/sumq << ")  "
-        	<< PhyloTree::aln->getSeqName(qid) << endl;
-        }
-#endif
     }
 
     resolved   = areacount[0] + areacount[1] + areacount[2];
     partly     = areacount[3] + areacount[4] + areacount[5];
     unresolved = areacount[6];
 	
+#if 0  // for debugging purposes only (HAS)
     out << endl << "LIKELIHOOD MAPPING SUMMARY" << endl << endl;
     out << "Number of quartets: " << (resolved+partly+unresolved)
         << " (randomly drawn with replacement)" << endl << endl;
@@ -1400,13 +1482,8 @@ void PhyloTree::doLikelihoodMapping() {
     out << "Number of unresolved      quartets: " << unresolved 
         << " (" << 100.0 * unresolved/(resolved+partly+unresolved) << "%)" << endl << endl;
 
-#if 0
 #endif
 
-    /**** end of report output ****/
-    /**** moved to PhyloTree::reportLikelihoodMapping ****/
-
-
     if (params->print_lmap_quartet_lh) {
         // print quartet file
         out.close();        
@@ -1421,25 +1498,14 @@ void PhyloTree::doLikelihoodMapping() {
     fclose(epsout);        
     cout << "likelihood mapping plot (EPS) written to " << lmap_epsfilename << endl;
 
-
-//    cout << "\nOverall quartet resolution: (from " << (resolved+partly+unresolved) << " randomly drawn quartets)" << endl;
-//    cout << "Fully resolved quartets:  " << resolved   << " (= "
-//        << (double) resolved * 100.0   / (resolved+partly+unresolved) << "%)" << endl;
-//    cout << "Partly resolved quartets: " << partly     << " (= "
-//        << (double) partly * 100.0     / (resolved+partly+unresolved) << "%)" << endl;
-//    cout << "Unresolved quartets:      " << unresolved << " (= "
-//        << (double) unresolved * 100.0 / (resolved+partly+unresolved) << "%)" << endl << endl;
-
 } // end PhyloTree::doLikelihoodMapping
 
 
 
 
 void PhyloTree::reportLikelihoodMapping(ofstream &out) {
-    // int areacount[8] = {0, 0, 0, 0, 0, 0, 0, 0};
-    // int cornercount[4] = {0, 0, 0, 0};
-    int resolved, partly, unresolved;
-    int qid;
+    int64_t resolved, partly, unresolved;
+    int64_t qid;
     int leafNum;
     leafNum = PhyloTree::aln->getNSeq();
     // vector<QuartetInfo> lmap_quartet_info;
@@ -1456,23 +1522,16 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) {
 	// LM_AR1  7   /* top third */
 	// LM_AR2  8   /* bottom-right third */
 	// LM_AR3  9   /* bottom-left third */
-#if 0
-#define LM_REG1 0   /* top corner */
-#define LM_REG2 1   /* bottom-right corner */
-#define LM_REG3 2   /* bottom-left corner */
-#define LM_REG4 3   /* right rectangle */
-#define LM_REG5 4   /* bottom rectangle */
-#define LM_REG6 5   /* left rectangle */
-#define LM_REG7 6   /* center */
-#define LM_AR1  7   /* top third */
-#define LM_AR2  8   /* bottom-right third */
-#define LM_AR3  9   /* bottom-left third */
-#endif
 
 	out << "LIKELIHOOD MAPPING ANALYSIS" << endl;
 	out << "---------------------------" << endl << endl;
-	out << "Number of quartets: " << params->lmap_num_quartets << " (randomly chosen from " 
+	out << "Number of quartets: " << params->lmap_num_quartets;
+    if (params->lmap_num_quartets < LMGroups.uniqueQuarts)
+        cout << " (randomly chosen with replacement from " 
 		<< LMGroups.uniqueQuarts << " existing unique quartets)" << endl << endl;
+    else
+        out << " (all unique quartets)" << endl << endl;
+        
 	out << "Quartet trees are based on the selected model of substitution." << endl << endl;
 
 	if(LMGroups.numGroups == 1) {
@@ -1613,8 +1672,7 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) {
         out << "     #quartets    a1 (% a1)        a2 (% a2)        a3 (% a3)    name" << endl;
 	out << "-----------------------------------------------------------------------------" << endl;
         for (qid = 0; qid <= leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
+	    int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
 	    if (sumq>0) sumq0=sumq;
 	    else sumq0=1;
 
@@ -1650,8 +1708,7 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) {
         out << "     #quartets     1 (% 1)          2 (% 2)          3 (% 3)          4 (% 4)          5 (% 5)          6 (% 6)          7 (% 7)" << endl;
 	out << "------------------------------------------------------------------------------------------------------------------------------------------------" << endl;
         for (qid = 0; qid <= leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
+	    int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
 	    if (sumq>0) sumq0=sumq;
 	    else sumq0=1;
 
@@ -1703,11 +1760,10 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) {
         out << "      #quartets   resolved           partly          unresolved  name" << endl;
 	out << "-----------------------------------------------------------------------------" << endl;
         for (qid = 0; qid <= leafNum; qid++) {
-	    //unsigned long sumq = lmap_seq_quartet_info[qid].countarr[0] + lmap_seq_quartet_info[qid].countarr[1] + lmap_seq_quartet_info[qid].countarr[2] + lmap_seq_quartet_info[qid].countarr[3] + lmap_seq_quartet_info[qid].countarr[4] + lmap_seq_quartet_info[qid].countarr[5] + lmap_seq_quartet_info[qid].countarr[6];
-	    unsigned long resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3];
-	    unsigned long partly   = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6];
-	    unsigned long unres    = lmap_seq_quartet_info[qid].countarr[LM_REG7];
-	    unsigned long sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
+	    int64_t resolved = lmap_seq_quartet_info[qid].countarr[LM_REG1] + lmap_seq_quartet_info[qid].countarr[LM_REG2] + lmap_seq_quartet_info[qid].countarr[LM_REG3];
+	    int64_t partly   = lmap_seq_quartet_info[qid].countarr[LM_REG4] + lmap_seq_quartet_info[qid].countarr[LM_REG5] + lmap_seq_quartet_info[qid].countarr[LM_REG6];
+	    int64_t unres    = lmap_seq_quartet_info[qid].countarr[LM_REG7];
+	    int64_t sumq0, sumq = lmap_seq_quartet_info[qid].countarr[7] + lmap_seq_quartet_info[qid].countarr[8] + lmap_seq_quartet_info[qid].countarr[9];
 	    if (sumq>0) sumq0=sumq;
 	    else sumq0=1;
 
@@ -1754,3 +1810,4 @@ void PhyloTree::reportLikelihoodMapping(ofstream &out) {
         << " (=" << 100.0 * unresolved/(resolved+partly+unresolved) << "%)" << endl << endl;
 
 } // end PhyloTree::reportLikelihoodMapping
+
diff --git a/superalignment.cpp b/superalignment.cpp
index 6d71ca0..7311455 100644
--- a/superalignment.cpp
+++ b/superalignment.cpp
@@ -101,10 +101,11 @@ void SuperAlignment::linkSubAlignment(int part) {
 	}
 }
 
-void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char) {
+void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa, IntVector *kept_partitions) {
 	assert(aln->isSuperAlignment());
 	SuperAlignment *saln = (SuperAlignment*)aln;
 
+    int i;
     IntVector::iterator it;
     for (it = seq_id.begin(); it != seq_id.end(); it++) {
         assert(*it >= 0 && *it < aln->getNSeq());
@@ -115,24 +116,33 @@ void SuperAlignment::extractSubAlignment(Alignment *aln, IntVector &seq_id, int
 	//Alignment::extractSubAlignment(aln, seq_id, 0);
 
 	taxa_index.resize(getNSeq());
-	for (int i = 0; i < getNSeq(); i++)
+	for (i = 0; i < getNSeq(); i++)
 		taxa_index[i].resize(saln->partitions.size(), -1);
 
 	int part = 0;
-	partitions.resize(saln->partitions.size());
+//	partitions.resize(saln->partitions.size());
+    partitions.resize(0);
 	for (vector<Alignment*>::iterator ait = saln->partitions.begin(); ait != saln->partitions.end(); ait++, part++) {
 		IntVector sub_seq_id;
 		for (IntVector::iterator it = seq_id.begin(); it != seq_id.end(); it++)
 			if (saln->taxa_index[*it][part] >= 0)
 				sub_seq_id.push_back(saln->taxa_index[*it][part]);
+        if (sub_seq_id.size() < min_taxa)
+            continue;
 		Alignment *subaln = new Alignment;
 		subaln->extractSubAlignment(*ait, sub_seq_id, 0);
-		partitions[part] = subaln;
-		linkSubAlignment(part);
+		partitions.push_back(subaln);
+		linkSubAlignment(partitions.size()-1);
+        if (kept_partitions) kept_partitions->push_back(part);
 //		cout << subaln->getNSeq() << endl;
 //		subaln->printPhylip(cout);
 	}
 
+    if (partitions.size() < saln->partitions.size()) {
+        for (i = 0; i < getNSeq(); i++)
+            taxa_index[i].resize(partitions.size());
+    }
+
 	// now build the patterns based on taxa_index
 	buildPattern();
 }
diff --git a/superalignment.h b/superalignment.h
index c3552ad..583dd0a 100644
--- a/superalignment.h
+++ b/superalignment.h
@@ -108,8 +108,10 @@ public:
             @param aln original input alignment
             @param seq_id ID of sequences to extract from
             @param min_true_cher the minimum number of non-gap characters, true_char<min_true_char -> delete the sequence
+            @param min_taxa only keep alignment that has >= min_taxa sequences
+            @param[out] kept_partitions (for SuperAlignment) indices of kept partitions
      */
-    virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char);
+    virtual void extractSubAlignment(Alignment *aln, IntVector &seq_id, int min_true_char, int min_taxa = 0, IntVector *kept_partitions = NULL);
 
     /**
      * remove identical sequences from alignment
diff --git a/tools.cpp b/tools.cpp
index fc01036..85ca06c 100644
--- a/tools.cpp
+++ b/tools.cpp
@@ -263,6 +263,35 @@ void convert_int_vec(const char *str, IntVector &vec) throw (string) {
 }
 
 
+int64_t convert_int64(const char *str) throw (string) {
+    char *endptr;
+    int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t
+
+    if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL || *endptr != 0) {
+        string err = "Expecting large integer , but found \"";
+        err += str;
+        err += "\" instead";
+        throw err;
+    }
+
+    return i;
+}
+
+int64_t convert_int64(const char *str, int &end_pos) throw (string) {
+	char *endptr;
+	int64_t i = (int64_t)strtoll(str, &endptr, 10); // casted because 'long long' may be larger than int64_t
+
+	if ((i == 0 && endptr == str) || abs(i) == HUGE_VALL) {
+		string err = "Expecting large integer, but found \"";
+		err += str;
+		err += "\" instead";
+		throw err;
+	}
+	end_pos = endptr - str;
+	return i;
+}
+
+
 double convert_double(const char *str) throw (string) {
     char *endptr;
     double d = strtod(str, &endptr);
@@ -613,6 +642,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.user_file = NULL;
     params.fai = false;
     params.testAlpha = false;
+    params.test_param = false;
     params.testAlphaEpsAdaptive = false;
     params.randomAlpha = false;
     params.testAlphaEps = 0.1;
@@ -882,7 +912,7 @@ void parseArg(int argc, char *argv[], Params &params) {
     params.freq_const_patterns = NULL;
     params.no_rescale_gamma_invar = false;
     params.compute_seq_identity_along_tree = false;
-    params.lmap_num_quartets = 0;
+    params.lmap_num_quartets = -1;
     params.lmap_cluster_file = NULL;
     params.print_lmap_quartet_lh = false;
     params.link_alpha = false;
@@ -2520,6 +2550,12 @@ void parseArg(int argc, char *argv[], Params &params) {
 				params.testAlpha = true;
 				continue;
 			}
+
+            if (strcmp(argv[cnt], "-test_param") == 0 || strcmp(argv[cnt], "--opt-gamma-inv") == 0) {
+                params.test_param = true;
+                continue;
+            }
+
             if (strcmp(argv[cnt], "--adaptive-eps") == 0) {
                 params.testAlphaEpsAdaptive = true;
                 continue;
@@ -2830,9 +2866,13 @@ void parseArg(int argc, char *argv[], Params &params) {
 				cnt++;
 				if (cnt >= argc)
 					throw "Use -lmap <likelihood_mapping_num_quartets>";
-				params.lmap_num_quartets = convert_int(argv[cnt]);
-				if (params.lmap_num_quartets < 1)
-					throw "Number of quartets must be >= 1";
+                if (strcmp(argv[cnt],"ALL") == 0) {
+                    params.lmap_num_quartets = 0;
+                } else {
+                    params.lmap_num_quartets = convert_int64(argv[cnt]);
+                    if (params.lmap_num_quartets < 0)
+                        throw "Number of quartets must be >= 1";
+                }
 				continue;
 			}
 
@@ -2844,6 +2884,8 @@ void parseArg(int argc, char *argv[], Params &params) {
 				// '-keep_ident' is currently required to allow a 1-to-1 mapping of the 
 				// user-given groups (HAS) - possibly obsolete in the future versions
 				params.ignore_identical_seqs = false;
+                if (params.lmap_num_quartets < 0)
+                    params.lmap_num_quartets = 0;
 				continue;
 			}
 
@@ -3121,7 +3163,7 @@ void usage_iqtree(char* argv[], bool full_command) {
             << "                       number of categories (default: n=4)" << endl
             << "  -a <Gamma_shape>     Gamma shape parameter for site rates (default: estimate)" << endl
             << "  -gmedian             Median approximation for +G site rates (default: mean)" << endl
-            << "  --test-alpha         More thorough estimation for +I+G model parameters" << endl
+            << "  --opt-gamma-inv      More thorough estimation for +I+G model parameters" << endl
             << "  -i <p_invar>         Proportion of invariable sites (default: estimate)" << endl
             << "  -mh                  Computing site-specific rates to .mhrate file using" << endl
             << "                       Meyer & von Haeseler (2003) method" << endl
diff --git a/tools.h b/tools.h
index ef24f80..21a2462 100644
--- a/tools.h
+++ b/tools.h
@@ -439,6 +439,12 @@ public:
 	bool testAlpha;
 
     /**
+     *  Restart the optimization of alpha and pinvar from different starting
+     *  pinv values (supercedes the option testAlpha
+     */
+    bool test_param;
+
+    /**
      *  Automatic adjust the log-likelihood espilon using some heuristic
      */
     bool testAlphaEpsAdaptive;
@@ -1685,7 +1691,7 @@ public:
     /** true to ignore checkpoint file */
     bool ignore_checkpoint;
     /** number of quartets for likelihood mapping */
-    int lmap_num_quartets;
+    int64_t lmap_num_quartets;
 
     /**
             file containing the cluster information for clustered likelihood mapping
@@ -1903,6 +1909,21 @@ int convert_int(const char *str, int &end_pos) throw (string);
 void convert_int_vec(const char *str, IntVector &vec) throw (string);
 
 /**
+        convert string to int64_t, with error checking
+        @param str original string
+        @return the number
+ */
+int64_t convert_int64(const char *str) throw (string);
+
+/**
+        convert string to int64_t, with error checking
+        @param str original string
+        @param end_pos end position
+        @return the number
+ */
+int64_t convert_int64(const char *str, int64_t &end_pos) throw (string);
+
+/**
         convert string to double, with error checking
         @param str original string
         @return the double

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iqtree.git



More information about the debian-med-commit mailing list