[med-svn] [raxml] 01/03: Imported Upstream version 8.2.9+dfsg

Andreas Tille tille at debian.org
Tue Aug 16 12:33:00 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository raxml.

commit f2c430abe2bdfe085987898666ffe7560bb2c7d5
Author: Andreas Tille <tille at debian.org>
Date:   Tue Aug 16 14:08:05 2016 +0200

    Imported Upstream version 8.2.9+dfsg
---
 README               |   2 +-
 axml.c               | 243 ++++++++++++++++++++++++++++++++++++++++++---------
 axml.h               |   7 +-
 manual/NewManual.odt | Bin 136750 -> 136886 bytes
 manual/NewManual.pdf | Bin 633998 -> 634532 bytes
 5 files changed, 207 insertions(+), 45 deletions(-)

diff --git a/README b/README
index 1befc7e..46561bc 100644
--- a/README
+++ b/README
@@ -1,4 +1,4 @@
-Standard RAxML version 8.2.8
+Standard RAxML version 8.2.9
 
 ============================================================================================================
 
diff --git a/axml.c b/axml.c
index 1927e39..7a66fba 100644
--- a/axml.c
+++ b/axml.c
@@ -41,6 +41,7 @@
 #include <unistd.h>
 #endif
 
+#include <time.h>
 #include <math.h>
 #include <time.h>
 #include <stdlib.h>
@@ -128,6 +129,29 @@ FILE *getNumberOfTrees(tree *tr, char *fileName, analdef *adef)
   return f;
 }
 
+static void checkStdoutFlush(void)
+{
+  /* If stdout is redirected, other processes monitoring RAxML's output
+     (e.g., via tail, or a pipe) do not receive any standard output until
+     stdio gets around to flushing the file, which may be a long time.
+     To provide more continuous feeding of RAxML output to these processes,
+     we force a flush of the stdout stream once per second.
+     (Dave Swofford 16july2016)
+  */
+  
+  static clock_t 
+    lastFlush;
+  
+  clock_t
+    now = clock();
+  
+  if(now - lastFlush > CLOCKS_PER_SEC)
+    {
+      fflush(stdout);
+      lastFlush = now;
+    }
+}
+
 static void printBoth(FILE *f, const char* format, ... )
 {
   va_list args;
@@ -138,6 +162,7 @@ static void printBoth(FILE *f, const char* format, ... )
   va_start(args, format);
   vprintf(format, args );
   va_end(args);
+  checkStdoutFlush();
 }
 
 void printBothOpen(const char* format, ... )
@@ -156,6 +181,7 @@ void printBothOpen(const char* format, ... )
       va_start(args, format);
       vprintf(format, args );
       va_end(args);
+      checkStdoutFlush();
       
       fclose(f);
     }     
@@ -177,6 +203,7 @@ void printBothOpenMPI(const char* format, ... )
       va_start(args, format);
       vprintf(format, args );
       va_end(args);
+      checkStdoutFlush();
       
       fclose(f);
     }
@@ -2995,6 +3022,11 @@ static void checkSequences(tree *tr, rawdata *rdta, analdef *adef)
 	    }
 
 
+	  if(adef->printIdenticalSequences == TRUE)
+	    {
+	      count = 0;
+	    }
+
 	  if(!filexists(noDupFile))
 	    {
 	      FILE 
@@ -3003,37 +3035,44 @@ static void checkSequences(tree *tr, rawdata *rdta, analdef *adef)
 	      if(adef->silent && (count || countUndeterminedColumns))
 		printBothOpen("\nIMPORTANT WARNING: Alignment validation warnings have been suppressed. Found %d duplicate %s and %d undetermined %s\n\n", 
 			      count, count > 1 ? "sequences" : "sequence", countUndeterminedColumns, countUndeterminedColumns > 1 ? "columns" : "column");
+	      
+	      //if(adef->printIdenticalSequences)
+	      //	count = 0;
  	      
-	      printBothOpen("Just in case you might need it, an alignment file with \n");
-	      if(count && !countUndeterminedColumns)
-		printBothOpen("sequence duplicates removed is printed to file %s\n", noDupFile);
-	      if(!count && countUndeterminedColumns)
-		printBothOpen("undetermined columns removed is printed to file %s\n", noDupFile);
-	      if(count && countUndeterminedColumns)
-		printBothOpen("sequence duplicates and undetermined columns removed is printed to file %s\n", noDupFile);
+	      if(count > 0 || countUndeterminedColumns > 0)
+		{
+		  printBothOpen("Just in case you might need it, an alignment file with \n");	     
+		
+		  if(count && !countUndeterminedColumns)
+		    printBothOpen("sequence duplicates removed is printed to file %s\n", noDupFile);
+		  if(!count && countUndeterminedColumns)
+		    printBothOpen("undetermined columns removed is printed to file %s\n", noDupFile);
+		  if(count && countUndeterminedColumns)
+		    printBothOpen("sequence duplicates and undetermined columns removed is printed to file %s\n", noDupFile);
 
-	      newFile = myfopen(noDupFile, "wb");
+		  newFile = myfopen(noDupFile, "wb");
 
-	      fprintf(newFile, "%d %d\n", tr->mxtips - count, rdta->sites - countUndeterminedColumns);
+		  fprintf(newFile, "%d %d\n", tr->mxtips - count, rdta->sites - countUndeterminedColumns);
 
-	      for(i = 1; i < n; i++)
-		{
-		  if(!omissionList[i])
+		  for(i = 1; i < n; i++)
 		    {
-		      fprintf(newFile, "%s ", tr->nameList[i]);
-		      tipI =  &(rdta->y[i][1]);
-
-		      for(j = 0; j < rdta->sites; j++)
+		      if(!omissionList[i] || count == 0)
 			{
-			  if(undeterminedList[j + 1] == 0)			    
-			    fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j]));			      			     			 
+			  fprintf(newFile, "%s ", tr->nameList[i]);
+			  tipI =  &(rdta->y[i][1]);
+			  
+			  for(j = 0; j < rdta->sites; j++)
+			    {
+			      if(undeterminedList[j + 1] == 0)			    
+				fprintf(newFile, "%c", getInverseMeaning(tr->dataVector[j + 1], tipI[j]));			      			     			 
+			    }
+			  
+			  fprintf(newFile, "\n");
 			}
-
-		      fprintf(newFile, "\n");
 		    }
-		}
 
-	      fclose(newFile);
+		  fclose(newFile);
+		}
 	    }
 	  else
 	    {
@@ -3713,6 +3752,7 @@ static void initAdef(analdef *adef)
   adef->bootstopPermutations = 100;
   adef->fcThreshold = 99;
   adef->sampleQuartetsWithoutReplacement = FALSE;
+  adef->printIdenticalSequences = FALSE; 
 }
 
 static int modelExists(char *model, analdef *adef)
@@ -4993,6 +5033,12 @@ static void printREADME(void)
 #endif
   printf("      [--bootstop-perms=number]\n");
   printf("\n");
+  printf("      [--quartets-without-replacement]\n");
+  printf("\n");
+  printf("      [---without-replacement]\n");
+  printf("\n");
+  printf("      [--print-identical-sequences]\n");
+  printf("\n");      
   printf("      -a      Specify a column weight file name to assign individual weights to each column of \n");
   printf("              the alignment. Those weights must be integers separated by any type and number \n");
   printf("              of whitespaces whithin a separate file, see file \"example_weights\" for an example.\n");
@@ -5254,7 +5300,7 @@ static void printREADME(void)
   printf("\n");
   printf("      -n      Specifies the name of the output file.\n");
   printf("\n");
-  printf("      -o      Specify the name of a single outgrpoup or a comma-separated list of outgroups, eg \"-o Rat\" \n");
+  printf("      -o      Specify the name of a single outgroup or a comma-separated list of outgroups, eg \"-o Rat\" \n");
   printf("              or \"-o Rat,Mouse\", in case that multiple outgroups are not monophyletic the first name \n");
   printf("              in the list will be selected as outgroup, don't leave spaces between taxon names!\n"); 
   printf("\n");
@@ -5445,6 +5491,11 @@ static void printREADME(void)
    printf("\n");
   printf("                  DEFAULT: random sampling with replacements\n"); 
   printf("\n");
+  printf("      --print-identical-sequences specify that RAxML shall automatically generate a .reduced alignment with all\n");
+  printf("                  undetermined columns removed, but without removing exactly identical sequences\n");
+  printf("\n");
+  printf("                  DEFAULT: identical sequences will also be removed in the .reduced file\n"); 
+  printf("\n");
   printf("\n\n\n\n");
 
 }
@@ -5594,7 +5645,7 @@ static void get_args(int argc, char *argv[], analdef *adef, tree *tr)
   while(1)
     {      
       static struct 
-	option long_options[17] =
+	option long_options[18] =
 	{	 
 	  {"mesquite",                  no_argument,       &flag, 1},
 	  {"silent",                    no_argument,       &flag, 1},
@@ -5612,6 +5663,7 @@ static void get_args(int argc, char *argv[], analdef *adef, tree *tr)
 	  {"set-thread-affinity",       no_argument,       &flag, 1},
 	  {"bootstop-perms",            required_argument, &flag, 1},
 	  {"quartets-without-replacement", no_argument,    &flag, 1},
+	  {"print-identical-sequences", no_argument,       &flag, 1},
 	  {0, 0, 0, 0}
 	};
       
@@ -5819,6 +5871,9 @@ static void get_args(int argc, char *argv[], analdef *adef, tree *tr)
 	    case 15:
 	      adef->sampleQuartetsWithoutReplacement = TRUE;
 	      break;
+	    case 16:
+	      adef->printIdenticalSequences = TRUE;
+	      break;
 	    default:
 	      if(flagCheck)
 		{
@@ -11511,16 +11566,40 @@ unsigned int precomputed16_bitcount (unsigned int n)
 
 /*** functions by Sarah for drawing quartets without replacement ***/
 
+/*
+Given the following nested for-loops:
+for b = a+1 to n-2
+  for c = b+1 to n-1
+    for d = c+1 to n
+How many iterations do we have for a given a?
+*/
 static uint64_t f2(int n, int a) 
 {
-  return ((n - a) * (n - 1 - a) * (n - 2 - a ) / 6);
+  long double nDouble = n;
+  long double aDouble = a;
+  long double res = (nDouble - aDouble) * (nDouble - 1 - aDouble) * (nDouble - 2 - aDouble) / 6;
+  return round(res);
 };
 
+/*
+Given the following nested for-loops:
+for c = b+1 to n-1
+  for d = c+1 to n
+How many iterations do we have for a given b?
+*/
 static uint64_t f3(int n, int b) 
 {
-  return ((n - b) * (n - 1 - b) / 2);
+  long double nDouble = n;
+  long double bDouble = b;
+  long double res = (nDouble - bDouble) * (nDouble - 1 - bDouble) / 2;
+  return round(res);
 };
 
+/*
+Given the following for-loop:
+for d = c+1 to n
+How many iterations do we have for a given c?
+*/
 static uint64_t f4(int n, int c) 
 {
   return (n-c);
@@ -11532,8 +11611,18 @@ static void preprocessQuartetPrefix(int numberOfTaxa, uint64_t *prefixSumF2, uin
     i,
     n = numberOfTaxa;
   
-  
-
+  /*
+  Given the following nested for-loops:
+  it = 0;
+  for a = 1 to n-3
+    for b = a+1 to n-2
+      for c = b+1 to n-1
+        for d = c+1 to n
+          it++;
+  prefixSumF2[i]: first value of it that belongs to a = i+1 
+  prefixSumF3[i]: first value of it that belongs to b = i+2
+  prefixSumF4[i]: first value of it that belongs to c = i+3
+  */
   prefixSumF2[0] = 1;
   prefixSumF3[0] = 1;
   prefixSumF4[0] = 1;
@@ -11546,20 +11635,23 @@ static void preprocessQuartetPrefix(int numberOfTaxa, uint64_t *prefixSumF2, uin
   }
 }
 
+/*
+Binary search in sorted array of size n-2. Returns the index of the greatest value in array that is <= z.
+*/
 static unsigned int binarySearch(uint64_t* array, uint64_t z, int n)
 {
   unsigned int 
     first = 0,
     last = n-3,
     middle = (first + last) / 2, 
-    lastSmaller = 0;
+    lastSmallerOrEqual = 0;
   
   while(first <= last)
     {
       if(array[middle] < z)
 	{
 	  first = middle + 1;
-	  lastSmaller = middle;
+	  lastSmallerOrEqual = middle;
 	}
       else 
 	{
@@ -11568,7 +11660,7 @@ static unsigned int binarySearch(uint64_t* array, uint64_t z, int n)
 	  else 
 	    { 
 	      // array[middle] == z
-	      lastSmaller = middle;
+	      lastSmallerOrEqual = middle;
 	      break;
 	    }
 	}
@@ -11576,14 +11668,32 @@ static unsigned int binarySearch(uint64_t* array, uint64_t z, int n)
       middle = (first + last)/2;
     }
 
-  return lastSmaller;
+  return lastSmallerOrEqual;
 }
 
+/**
+Map an integer value z to a quartet (t1,t2,t3,t4).
+
+ at param numberOfTaxa The number of taxa in the tree.
+ at param z A value encoding a quartet (t1,t2,t3,t4). 
+*/
 static void mapNumberToQuartet(int numberOfTaxa, uint64_t z, int *t1, int *t2, int *t3, int *t4, uint64_t *prefixSumF2, uint64_t *prefixSumF3, uint64_t *prefixSumF4)
 {
+  /*
+  Given the following nested for-loops:
+  z = 0;
+  for t1 = 1 to numberOfTaxa-3
+    for t2 = t1+1 to numberOfTaxa-2
+      for t3 = t2+1 to numberOfTaxa-1
+        for t4 = t3+1 to numberOfTaxa
+          z++;
+  Find the quartet (t1,t2,t3,t4) that belongs to the given value of z.
+  */
+  
   uint64_t    
     wantedT1 = z;
 
+  // find the first value of z that belongs to t1
   *t1 = binarySearch(prefixSumF2, z, numberOfTaxa) + 1;
 
   uint64_t 
@@ -11600,6 +11710,7 @@ static void mapNumberToQuartet(int numberOfTaxa, uint64_t z, int *t1, int *t2, i
   uint64_t 
     wantedT2 = (prefixSumF3[*t1 - 1]) + (wantedT1 - foundT1);
   
+  // find the first value of z that belongs to t2
   *t2 = binarySearch(prefixSumF3, wantedT2, numberOfTaxa) + 2;
 
   uint64_t 
@@ -11615,6 +11726,7 @@ static void mapNumberToQuartet(int numberOfTaxa, uint64_t z, int *t1, int *t2, i
   uint64_t 
     wantedT3 = (prefixSumF4[*t2 - 2]) + (wantedT2 - foundT2);
   
+  // find the first value of z that belongs to t3
   *t3 = binarySearch(prefixSumF4, wantedT3, numberOfTaxa) + 3;
 
   uint64_t 
@@ -11626,6 +11738,7 @@ static void mapNumberToQuartet(int numberOfTaxa, uint64_t z, int *t1, int *t2, i
       return;
     }
 
+  // find the value of z that belongs to t4
   *t4 = wantedT3 - foundT3 + *t3 + 1;
 }
 
@@ -11975,6 +12088,23 @@ static void computeAllThreeQuartets(tree *tr, nodeptr q1, nodeptr q2, int t1, in
 #define RANDOM_QUARTETS 1
 #define GROUPED_QUARTETS 2
 
+/**
+Sample random quartets in ascending order using the methodA algorithm from J. S. Vitter, "An efficient algorithm for sequential random sampling". The runtime of this algorithm is O(numberOfQuartets).
+
+ at param tr The tree.
+ at param numberOfTaxa The number of taxa in the tree.
+ at param seed
+ at param numberOfQuartets The total number of different quartets that exist for numberOfTaxa taxa.
+ at param randomQuartets The number of quartets to sample.
+ at param q1
+ at param q2
+ at param prefixSumF2
+ at param prefixSumF3
+ at param prefixSumF4
+ at param f
+ at param adef
+ at param actVal The value of the last drawn random number representing a quartet.
+*/
 static void sampleQuartetsWithoutReplacementA(tree *tr, int numberOfTaxa, int64_t seed, uint64_t numberOfQuartets, uint64_t randomQuartets, nodeptr q1, nodeptr q2, uint64_t *prefixSumF2, uint64_t *prefixSumF3, uint64_t *prefixSumF4, FILE *f, analdef *adef, uint64_t actVal)
 {
   int64_t 
@@ -12026,14 +12156,35 @@ static void sampleQuartetsWithoutReplacementA(tree *tr, int numberOfTaxa, int64_
   actVal += s+1;
   
   mapNumberToQuartet(numberOfTaxa, actVal, &t1, &t2, &t3, &t4, prefixSumF2, prefixSumF3, prefixSumF4);
+  #ifdef _QUARTET_MPI
+				  //MPI version very simple and naive way to determine which processor 
+				  //is going to do the likelihood calculations for this quartet
+				  if((quartetCounter % (uint64_t)(processes - 1)) == (uint64_t)(processID - 1))
+#endif
   computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f, adef);
   quartetCounter++;
 
   assert(quartetCounter == randomQuartets);
 }
 
-static void sampleQuartetsWithoutReplacementD(tree *tr, int numberOfTaxa, int64_t seed, uint64_t numberOfQuartets, uint64_t randomQuartets, nodeptr q1, nodeptr q2, 
-					      uint64_t *prefixSumF2, uint64_t *prefixSumF3, uint64_t *prefixSumF4, FILE *f, analdef *adef, uint64_t actVal)
+/**
+Sample random quartets in ascending order using the methodD algorithm from J. S. Vitter, "An efficient algorithm for sequential random sampling". The runtime of this algorithm is O(randomQuartets). The main idea of the algorithm is to decide ho many quartets to skip instead of testing for each quartet whether to take it or not.
+
+ at param tr The tree.
+ at param numberOfTaxa The number of taxa in the tree.
+ at param seed
+ at param numberOfQuartets The total number of different quartets that exist for numberOfTaxa taxa.
+ at param randomQuartets The number of quartets to sample.
+ at param q1
+ at param q2
+ at param prefixSumF2
+ at param prefixSumF3
+ at param prefixSumF4
+ at param f
+ at param adef
+ at param actVal The value of the last drawn random number representing a quartet.
+*/
+static void sampleQuartetsWithoutReplacementD(tree *tr, int numberOfTaxa, int64_t seed, uint64_t numberOfQuartets, uint64_t randomQuartets, nodeptr q1, nodeptr q2, uint64_t *prefixSumF2, uint64_t *prefixSumF3, uint64_t *prefixSumF4, FILE *f, analdef *adef, uint64_t actVal)
 {
   int64_t       
     myseed = seed;
@@ -12148,6 +12299,11 @@ static void sampleQuartetsWithoutReplacementD(tree *tr, int numberOfTaxa, int64_
       // Skip over the next s records and select the following one for the sample
       actVal += s+1;
       mapNumberToQuartet(numberOfTaxa, actVal, &t1, &t2, &t3, &t4, prefixSumF2, prefixSumF3, prefixSumF4);
+      #ifdef _QUARTET_MPI
+				  //MPI version very simple and naive way to determine which processor 
+				  //is going to do the likelihood calculations for this quartet
+				  if((quartetCounter % (uint64_t)(processes - 1)) == (uint64_t)(processID - 1))
+#endif
       computeAllThreeQuartets(tr, q1, q2, t1, t2, t3, t4, f, adef);
       quartetCounter++;
       assert(quartetCounter == randomQuartets);
@@ -12373,7 +12529,11 @@ static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata
 	  break;
 	case RANDOM_QUARTETS:
 	  {	 
-	    //code contributed by Sarah for drawing quartets without replacement :-) 
+	    //code contributed by Sarah for drawing quartets without replacement :-)
+        // Sample random quartets without replacement in O(randomQuartets * log(tr->mxtips)) time and O(tr->mxtips) space.
+        // This is achieved by drawing random numbers in ascending order and using prefix sums to map a number to a
+        // quartet (t1,t2,t3,t4) using the lexicographical ordering of the quartets. For each quartet, it is required
+        // that 1 <= t1 < t2 < t3 < t4 <= tr->mxtips.
 	    
 	    if(adef->sampleQuartetsWithoutReplacement)
 	      {
@@ -12383,11 +12543,11 @@ static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata
 		  *prefixSumF4 = (uint64_t*)rax_malloc(sizeof(uint64_t) * (size_t)(tr->mxtips - 2));
 
 		preprocessQuartetPrefix(tr->mxtips, prefixSumF2, prefixSumF3, prefixSumF4);
-		
-		if(randomQuartets >= numberOfQuartets / 13)		
-		  sampleQuartetsWithoutReplacementA(tr, tr->mxtips, adef->parsimonySeed, numberOfQuartets, randomQuartets, q1, q2, prefixSumF2, prefixSumF3, prefixSumF4, f, adef, 0);		
-		else		
-		  sampleQuartetsWithoutReplacementD(tr, tr->mxtips, adef->parsimonySeed, numberOfQuartets, randomQuartets, q1, q2, prefixSumF2, prefixSumF3, prefixSumF4, f, adef, 0);	       
+
+		if (randomQuartets >= numberOfQuartets/13) // decide for each quartet whether to take it or not
+		  sampleQuartetsWithoutReplacementA(tr, tr->mxtips, adef->parsimonySeed, numberOfQuartets, randomQuartets, q1, q2, prefixSumF2, prefixSumF3, prefixSumF4, f, adef, 0);
+		else // decide how many quartets to skip before taking the next one
+		  sampleQuartetsWithoutReplacementD(tr, tr->mxtips, adef->parsimonySeed, numberOfQuartets, randomQuartets, q1, q2, prefixSumF2, prefixSumF3, prefixSumF4, f, adef, 0);
 
 		rax_free(prefixSumF2);
 		rax_free(prefixSumF3);
@@ -12423,7 +12583,7 @@ static void computeQuartets(tree *tr, analdef *adef, rawdata *rdta, cruncheddata
 				{
 #ifdef _QUARTET_MPI
 				  //MPI version very simple and naive way to determine which processor 
-				  //is goingt to do the likelihood calculations for this quartet
+				  //is going to do the likelihood calculations for this quartet
 				  if((quartetCounter % (uint64_t)(processes - 1)) == (uint64_t)(processID - 1))
 #endif
 				    //function that computes the likelihood for all three possible unrooted trees 
@@ -13069,6 +13229,7 @@ static void rootTree(tree *tr, analdef *adef)
   distancesNewview(tr->start->back, distances, tr, &rootBranch, &minimum);
 
   printTree(rootBranch, tr, distances, f, printBranchLabels);
+  fprintf(f, "\n");
   
   fclose(f);
 
diff --git a/axml.h b/axml.h
index 58aeb8a..b5e4799 100644
--- a/axml.h
+++ b/axml.h
@@ -168,9 +168,9 @@
 #define PointGamma(prob,alpha,beta)  PointChi2(prob,2.0*(alpha))/(2.0*(beta))
 
 #define programName        "RAxML"
-#define programVersion     "8.2.8"
-#define programVersionInt   8280
-#define programDate        "March 23 2016"
+#define programVersion     "8.2.9"
+#define programVersionInt   8290
+#define programDate        "July 20 2016"
 
 
 #define  TREE_EVALUATION                 0
@@ -1176,6 +1176,7 @@ typedef  struct {
   int           bootstopPermutations;
   int           fcThreshold; 
   boolean       sampleQuartetsWithoutReplacement;
+  boolean       printIdenticalSequences;
 } analdef;
 
 
diff --git a/manual/NewManual.odt b/manual/NewManual.odt
index ac948ca..e3ed9fe 100644
Binary files a/manual/NewManual.odt and b/manual/NewManual.odt differ
diff --git a/manual/NewManual.pdf b/manual/NewManual.pdf
index 5212877..81214d7 100644
Binary files a/manual/NewManual.pdf and b/manual/NewManual.pdf differ

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/raxml.git



More information about the debian-med-commit mailing list