[med-svn] [gubbins] 01/03: Imported Upstream version 2.1.0

Andreas Tille tille at debian.org
Fri Aug 5 21:53:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository gubbins.

commit 63f5943e24e8a4818a457721343347e0ceaea01d
Author: Andreas Tille <tille at debian.org>
Date:   Fri Aug 5 23:46:56 2016 +0200

    Imported Upstream version 2.1.0
---
 CHANGELOG                                          |  5 ++
 INSTALL.md                                         |  2 +
 VERSION                                            |  2 +-
 python/gubbins/RAxMLExecutable.py                  | 12 +++-
 python/gubbins/common.py                           |  2 +-
 .../input_alignment.fasta                          |  2 +-
 python/gubbins/tests/test_external_dependancies.py |  1 +
 python/scripts/run_gubbins.py                      |  2 +
 src/branch_sequences.c                             | 83 +++++++---------------
 src/branch_sequences.h                             |  1 -
 src/string_cat.c                                   |  9 +--
 tests/check_branch_sequences.c                     | 18 ++---
 12 files changed, 53 insertions(+), 86 deletions(-)

diff --git a/CHANGELOG b/CHANGELOG
index 69f2409..223b398 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,8 @@
+v2.1.0 - 22 July 2016
+------
+Use GTRCAT model by default in RAxML instead of GTRGAMMA (massive speedup).
+C code optimisations in Gubbins.
+
 v2.0.0 - 26 May 2016
 ------
 Reconstruct internal sequences by default using RAxML rather than fastML. 
diff --git a/INSTALL.md b/INSTALL.md
index 41e524f..f3de67b 100644
--- a/INSTALL.md
+++ b/INSTALL.md
@@ -97,6 +97,8 @@ autoreconf -i
 ./configure
 make
 sudo make install
+cd python
+sudo python3 setup.py install
 ```
 
 ## OSX/Linux/Windows - Virtual Machine
diff --git a/VERSION b/VERSION
index 227cea2..7ec1d6d 100644
--- a/VERSION
+++ b/VERSION
@@ -1 +1 @@
-2.0.0
+2.1.0
diff --git a/python/gubbins/RAxMLExecutable.py b/python/gubbins/RAxMLExecutable.py
index a27188b..2401a32 100644
--- a/python/gubbins/RAxMLExecutable.py
+++ b/python/gubbins/RAxMLExecutable.py
@@ -23,18 +23,24 @@ import subprocess
 import re
 
 class RAxMLExecutable(object):
-	def __init__(self, threads,  verbose = False ):
+	def __init__(self, threads, model = 'GTRCAT', verbose = False ):
 		self.verbose = verbose
 		self.threads = threads
 		self.single_threaded_executables = ['raxmlHPC-AVX','raxmlHPC-SSE3','raxmlHPC']
 		self.multi_threaded_executables = ['raxmlHPC-PTHREADS-AVX','raxmlHPC-PTHREADS-SSE3','raxmlHPC-PTHREADS']
+		self.model = model
 		
 		self.raxml_executable = self.select_executable_based_on_threads()
-		self.tree_building_parameters = ' -f d -p 1 -m GTRGAMMA '
+		self.tree_building_parameters_gtrgamma = ' -f d -p 1 -m GTRGAMMA '
+		self.tree_building_parameters_gtrcat = ' -f d -p 1 -m GTRCAT -V '
 		self.internal_sequence_parameters = ' -f A -p 1 -m GTRGAMMA '
 		
 	def tree_building_command(self):
-		command = self.raxml_executable + self.threads_parameter() + self.tree_building_parameters
+		tree_building_parameters = self.tree_building_parameters_gtrcat
+		if self.model == 'GTRGAMMA':
+			tree_building_parameters =self.tree_building_parameters_gtrgamma
+		
+		command = self.raxml_executable + self.threads_parameter() + tree_building_parameters
 		if self.verbose:
 			print("Tree building command: "+command)
 		return command
diff --git a/python/gubbins/common.py b/python/gubbins/common.py
index e1ae21d..b1f2e4e 100644
--- a/python/gubbins/common.py
+++ b/python/gubbins/common.py
@@ -102,7 +102,7 @@ class GubbinsCommon():
 
   def parse_and_run(self):
     # Default parameters
-    raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.verbose)
+    raxml_executable_obj = RAxMLExecutable(self.args.threads, self.args.raxml_model, self.args.verbose)
     
     fasttree_executables = ['FastTree','fasttree']
     FASTTREE_EXEC = GubbinsCommon.choose_executable(fasttree_executables)
diff --git a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
index 6d75ec7..2367ec0 100644
--- a/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
+++ b/python/gubbins/tests/data/raxml_sequence_reconstruction/input_alignment.fasta
@@ -9,4 +9,4 @@ CCCTT
 >E
 CCTTT
 >F
-CCGGG
\ No newline at end of file
+CCGGG
diff --git a/python/gubbins/tests/test_external_dependancies.py b/python/gubbins/tests/test_external_dependancies.py
index ef03d04..6066171 100644
--- a/python/gubbins/tests/test_external_dependancies.py
+++ b/python/gubbins/tests/test_external_dependancies.py
@@ -212,6 +212,7 @@ class TestExternalDependancies(unittest.TestCase):
       parser.add_argument('--threads',          '-c', help='Number of threads to run with RAXML, but only if a PTHREADS version is available', type=int,  default = 1)
       parser.add_argument('--converge_method',  '-z', help='Criteria to use to know when to halt iterations [weighted_robinson_foulds|robinson_foulds|recombination]',  default = 'weighted_robinson_foulds')
       parser.add_argument('--version',                action='version', version=str(pkg_resources.get_distribution("gubbins").version))
+      parser.add_argument('--raxml_model',      '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT',  default = 'GTRCAT')
       return parser
       
   def default_arg_parse(self):
diff --git a/python/scripts/run_gubbins.py b/python/scripts/run_gubbins.py
index 592a64d..35fc07c 100755
--- a/python/scripts/run_gubbins.py
+++ b/python/scripts/run_gubbins.py
@@ -21,6 +21,7 @@
 
 import sys
 sys.path.append(".")
+sys.path.append("..")
 import argparse
 import pkg_resources
 from gubbins import common
@@ -44,6 +45,7 @@ parser.add_argument('--converge_method',  '-z', help='Criteria to use to know wh
 parser.add_argument('--version',                action='version', version=str(pkg_resources.get_distribution("gubbins").version))
 parser.add_argument('--min_window_size',  '-a', help='Minimum window size, default 100', type=int,  default = 100)
 parser.add_argument('--max_window_size',  '-b', help='Maximum window size, default 10000', type=int,  default = 10000)
+parser.add_argument('--raxml_model',      '-r', help='RAxML model [GTRGAMMA|GTRCAT], default GTRCAT',  default = 'GTRCAT')
 
 gubbins_runner  = common.GubbinsCommon(parser.parse_args())
 gubbins_runner.parse_and_run()
diff --git a/src/branch_sequences.c b/src/branch_sequences.c
index fcdbb2b..a7989e1 100644
--- a/src/branch_sequences.c
+++ b/src/branch_sequences.c
@@ -78,37 +78,31 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre
 {
 	int num_snps_in_recombinations =0;
 	int i = 0;
+  
+  // loop over each block
 	for(i = 0; i<num_blocks; i++ )
 	{
 		int current_index = 0;
+    // convert the starting coordinates of block to the nearest SNP index
 		current_index = find_starting_index(current_block_coordinates[0][i],snp_locations,0, number_of_snps);
 		
-		int j;
-		for(j = current_index; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++)
+    //make sure that the index begins at start of block
+		int beginning_j = current_index;
+    for(beginning_j = current_index; snp_locations[beginning_j] < current_block_coordinates[0][i];beginning_j++)
+    {
+    }
+    
+    int j;
+    // starting at the begining index of block, count all the snps until the end of the bock.
+		for(j = beginning_j; (j < number_of_snps && snp_locations[j] <= current_block_coordinates[1][i]); j++)
 		{
-			if(snp_locations[j] >= current_block_coordinates[0][i] && snp_locations[j] <= current_block_coordinates[1][i])
-			{
-				int k = 0;
-				int seen_before = 0;
-				// has this snp index been flagged before?
-				for(k =0; k < num_snps_in_recombinations; k++)
-				{
-					if(snps_in_recombinations[k] == j)
-					{
-						seen_before = 1;
-						break;
-					}
-				}
-				if(seen_before == 0)
-				{
-					snps_in_recombinations[num_snps_in_recombinations] = j;
-					num_snps_in_recombinations++;
-    			}
-			}
+				snps_in_recombinations[num_snps_in_recombinations] = j;
+				num_snps_in_recombinations++;
 		}
 	}
+
+  // may contain duplications
 	return num_snps_in_recombinations;
-		
 }
 
 
@@ -573,11 +567,6 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
 	// Set up the window counter with 1 value per base in the branch
  	int * window_count;
 	window_count = (int *) calloc((genome_size+1),sizeof(int));
-	int i;
-	for(i =0; i< genome_size; i++)
-	{
-		window_count[i] = 0;
-	}
 	
 	// Integer array with location of gaps
 	int * gaps_in_original_genome_space;
@@ -626,7 +615,7 @@ int get_blocks(int ** block_coordinates, int genome_size,int * snp_site_coords,i
 	int in_block = 0;
 	int block_lower_bound = 0;
 	// Scan across the pileup and record where blocks are above the cutoff
-	
+	int i;
 	for(i = 0; i < genome_size; i++)
 	{
 		// Just entered the start of a block
@@ -1043,12 +1032,14 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt
 	int * bases_to_be_excluded;  
 	bases_to_be_excluded = (int*) calloc((length_of_sequence + 1),sizeof(int));
 	
+  int genome_length = length_of_sequence;
 	int i = 0;
 	for(i = 0; i<length_of_sequence; i++)
 	{
 		if(sequence[i] == 'N' || sequence[i] == '-' )
 		{
 			bases_to_be_excluded[i] = 1;
+      genome_length--;
 		}
 	}
 	
@@ -1064,41 +1055,15 @@ int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int lengt
 		int block_index = 0;
 		for(block_index = block_coordinates[0][j]; block_index <= block_coordinates[1][j]; block_index++ )
 		{
-			bases_to_be_excluded[block_index-1] = 1;
+      if(bases_to_be_excluded[block_index-1] == 0)
+      {
+        bases_to_be_excluded[block_index-1] = 1;
+        genome_length--;
+      }
 		}
 	}
 	
-    int genome_length = 0;
-	for(i = 0; i<length_of_sequence; i++)
-	{
-		if(bases_to_be_excluded[i] == 0 )
-		{
-			genome_length++;
-		}
-	}
 	return genome_length;
 }
 
 
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/src/branch_sequences.h b/src/branch_sequences.h
index 64a3591..c8f6847 100644
--- a/src/branch_sequences.h
+++ b/src/branch_sequences.h
@@ -47,7 +47,6 @@ int get_list_of_snp_indices_which_fall_in_downstream_recombinations(int ** curre
 
 int calculate_genome_length_excluding_blocks_and_gaps(char * sequence, int length_of_sequence, int ** block_coordinates, int num_blocks);
 
-
 #define WINDOW_SNP_MODE_TARGET 10
 #define RANDOMNESS_DAMPNER 0.05
 #define MAX_SAMPLE_NAME_SIZE 1024
diff --git a/src/string_cat.c b/src/string_cat.c
index 50b3df3..17f3d5b 100644
--- a/src/string_cat.c
+++ b/src/string_cat.c
@@ -21,16 +21,9 @@
 #include <stdlib.h>
 #include <string.h>
 
-
 int size_of_string(char *input_string)
 {
-	int i = 0;
-
-	while( input_string[i] != '\0')
-	{
-		i++;
-	}
-	return i;
+  return strlen(input_string);
 }
 
 void concat_strings_created_with_malloc(char *input_string, char *string_to_concat)
diff --git a/tests/check_branch_sequences.c b/tests/check_branch_sequences.c
index b6f8c90..78b9eca 100644
--- a/tests/check_branch_sequences.c
+++ b/tests/check_branch_sequences.c
@@ -153,22 +153,19 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination
 {
 	int ** block_coords;  
 	block_coords  = (int **) malloc(2*sizeof(int*));
-	block_coords[0] = (int*) malloc((4)*sizeof(int ));
-	block_coords[1] = (int*) malloc((4)*sizeof(int ));
+	block_coords[0] = (int*) malloc((2)*sizeof(int ));
+	block_coords[1] = (int*) malloc((2)*sizeof(int ));
 	block_coords[0][0] = 5;
 	block_coords[1][0] = 10;
 	block_coords[0][1] = 30;
 	block_coords[1][1] = 35;
-	block_coords[0][2] = 20;
-	block_coords[1][2] = 25;
-	block_coords[0][3] = 7;
-	block_coords[1][3] = 15;
+
 	int snp_locations[16] = {1,4,5,6,7,10,11,15,19,20,29,30,35,36,40,50};
 	
-    int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int));
+  int * snps_in_recombinations = (int *) calloc((16 +1),sizeof(int));
 	int num_snps_in_recombinations = 0;
-    num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,4,snp_locations,16, snps_in_recombinations);
-    fail_unless(num_snps_in_recombinations == 9);
+  num_snps_in_recombinations = get_list_of_snp_indices_which_fall_in_downstream_recombinations(block_coords,2,snp_locations,16, snps_in_recombinations);
+  fail_unless(num_snps_in_recombinations == 6);
 	
 	fail_unless(snps_in_recombinations[0] == 2);
 	fail_unless(snps_in_recombinations[1] == 3);
@@ -176,9 +173,6 @@ START_TEST (check_get_list_of_snp_indices_which_fall_in_downstream_recombination
 	fail_unless(snps_in_recombinations[3] == 5);
 	fail_unless(snps_in_recombinations[4] == 11);
 	fail_unless(snps_in_recombinations[5] == 12);
-	fail_unless(snps_in_recombinations[6] == 9);
-	fail_unless(snps_in_recombinations[7] == 6);
-	fail_unless(snps_in_recombinations[8] == 7);
 }
 END_TEST
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/gubbins.git



More information about the debian-med-commit mailing list