[med-svn] [subread] 02/03: Imported Upstream version 1.5.0-p3+dfsg

Alex Mestiashvili malex-guest at moszumanska.debian.org
Fri Jun 3 13:07:59 UTC 2016


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository subread.

commit 5c4e244dd213e7f263430735647072af434781be
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Thu Jun 2 14:29:20 2016 +0200

    Imported Upstream version 1.5.0-p3+dfsg
---
 doc/SubreadUsersGuide.tex |   6 +-
 src/HelperFunctions.c     |  91 +++++++++++++
 src/HelperFunctions.h     |   2 +
 src/Makefile              |   1 -
 src/SNPCalling.c          |  63 ++++++---
 src/core-indel.c          |   5 +-
 src/core.c                |  57 ++++----
 src/input-files.c         | 338 ++++++++++++++++++++++++++++++++++++++++------
 src/input-files.h         |   4 +
 src/makefile.version      |   2 +-
 src/propmapped.c          |   1 +
 src/readSummary.c         |   7 +-
 src/test-fisher.c         |  84 ++++++++++++
 13 files changed, 567 insertions(+), 94 deletions(-)

diff --git a/doc/SubreadUsersGuide.tex b/doc/SubreadUsersGuide.tex
index 48ee9a6..0bb5644 100644
--- a/doc/SubreadUsersGuide.tex
+++ b/doc/SubreadUsersGuide.tex
@@ -35,9 +35,9 @@
 \begin{center}
 {\Huge\bf Subread/Rsubread Users Guide}\\
 \vspace{1 cm}
-{\centering\large Subread v1.5.0-p2/Rsubread v1.20.5\\}
+{\centering\large Subread v1.5.0-p3/Rsubread v1.22.1\\}
 \vspace{1 cm}
-\centering 11 April 2016\\
+\centering 27 May 2016\\
 \vspace{5 cm}
 \Large Wei Shi and Yang Liao\\
 \vspace{1 cm}
@@ -764,7 +764,7 @@ Note that, when counting at the meta-feature level, reads that overlap multiple
 
 \subsection{In-built annotations}
 
-In-built gene annotations for genomes \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
+In-built gene annotations for genomes \emph{hg38}, \emph{hg19}, \emph{mm10} and \emph{mm9} are included in both Bioconductor {\Rsubread} package and SourceForge {\Subread} package.
 These annotations were downloaded from NCBI RefSeq database and then adapted by merging overlapping exons from the same gene to form a set of disjoint exons for each gene.
 Genes with the same Entrez gene identifiers were also merged into one gene.
 
diff --git a/src/HelperFunctions.c b/src/HelperFunctions.c
index 39b9dd4..c976dc0 100644
--- a/src/HelperFunctions.c
+++ b/src/HelperFunctions.c
@@ -20,6 +20,7 @@
 #include <ctype.h>
 #include <string.h>
 #include <assert.h>
+#include <math.h>
 
 
 #ifdef MACOS
@@ -844,6 +845,8 @@ int mac_str(char * str_buff)
         }
     }
 
+    close(sock);
+
     unsigned char mac_address[6];
 
     if (success){
@@ -886,5 +889,93 @@ int mac_or_rand_str(char * str_buff){
 	return mac_str(str_buff) && rand_str(str_buff) && mathrand_str(str_buff);
 }
 
+#define PI_LONG 3.1415926535897932384626434L
+
+long double fast_fractorial(unsigned int x, long double * buff, int buflen){
+	if(x<2) return 0;
+	
+	if(buff != NULL && x < buflen && buff[x]!=0){
+		return buff[x];
+	}
+	long double ret;
+
+	if(x<50){
+		int x1;
+		ret =0.L;
+		for(x1 = 2 ; x1 <= x; x1++) ret += logl((long double)(x1));
+	}else{
+		ret = logl(x)*1.0L*x - 1.0L*x + 0.5L * logl(2.0L*PI_LONG* x) + 1.L/(12.L*x) - 1.L/(360.L* x*x*x) +  1.L/(1260.L* x*x*x*x*x) -  1.L/(1680.L*x*x*x*x*x*x*x);
+	}
+	if(buff != NULL && x < buflen) buff[x]=ret;
+	return ret;
+}
+
+
+#define BUFF_4D 36
+
+long double fast_freq( unsigned int tab[2][2] , long double * buff, int buflen){
+	int x0 = -1;
+	if(buff && buflen > BUFF_4D * BUFF_4D * BUFF_4D * BUFF_4D && tab[0][0] < BUFF_4D && tab[0][1] < BUFF_4D && tab[1][0] < BUFF_4D && tab[1][1] < BUFF_4D ){
+		x0 = buflen + tab[0][0]* BUFF_4D * BUFF_4D * BUFF_4D + tab[0][1] * BUFF_4D * BUFF_4D + tab[1][0] * BUFF_4D + tab[1][1];
+		if(buff[x0]!=0) return buff[x0];
+		
+	}
+	long double ret = fast_fractorial(tab[0][0]+tab[0][1],buff,buflen)+fast_fractorial(tab[1][0]+tab[1][1],buff,buflen) + 
+		fast_fractorial(tab[0][0]+tab[1][0],buff,buflen)+fast_fractorial(tab[0][1]+tab[1][1],buff,buflen) -
+		fast_fractorial(tab[0][0],buff,buflen) - fast_fractorial(tab[0][1],buff,buflen) - 
+		fast_fractorial(tab[1][0],buff,buflen) - fast_fractorial(tab[1][1],buff,buflen) - 
+		fast_fractorial(tab[0][0] + tab[0][1] + tab[1][0] + tab[1][1],buff,buflen);
+	if(x0>=0) buff[x0] = ret;
+	return ret;
+}
+
+
+
+double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * buff, int buflen){
+	unsigned int tab[2][2];
+	long double P0, P_delta, ret;
+
+	tab[0][0]=a;
+	tab[0][1]=b;
+	tab[1][0]=c;
+	tab[1][1]=d;
+
+	int x_a, y_a, x_min=-1, y_min=-1;
+	unsigned int min_a = 0xffffffff;
+	for(x_a = 0; x_a < 2; x_a++)
+		for(y_a = 0; y_a < 2; y_a++){
+			if(tab[x_a][y_a]<min_a){
+				min_a = tab[x_a][y_a];
+				x_min = x_a;
+				y_min = y_a;
+			}
+		}
+	P_delta = fast_freq(tab, buff, buflen);
+	P0 = ret = exp(P_delta);
+	//printf("P0=%LG\n", P0);
+	if(min_a>0){
+		unsigned int Qa = min_a;
+		unsigned int Qb = tab[x_min][!y_min];
+		unsigned int Qc = tab[!x_min][y_min];
+		unsigned int Qd = tab[!x_min][!y_min];
+		for(; ; ){
+			min_a --;
+			Qb++;Qc++;
+			P_delta -= logl(Qb*Qc);
+			P_delta += logl(Qa*Qd);
+			Qa--;Qd--;
+			ret += expl(P_delta);
+		//	printf("%LG %LG %LG\n", ret, 1 - (ret - P0), expl(P_delta));
+			if(min_a < 1) break;
+		}
+	}
+
+	double ret1 = ret, ret2 = 1 - (ret - P0);
 
+	if(min(ret1, ret2) < 0){
+		return 0.0;
+	}
+	return  min(ret1, ret2) ;
+
+}
 
diff --git a/src/HelperFunctions.h b/src/HelperFunctions.h
index 2f40d21..65beb1e 100644
--- a/src/HelperFunctions.h
+++ b/src/HelperFunctions.h
@@ -69,4 +69,6 @@ int strcmp_number(char * s1, char * s2);
 unsigned int reverse_cigar(unsigned int pos, char * cigar, char * new_cigar);
 unsigned int find_left_end_cigar(unsigned int right_pos, char * cigar);
 int mac_or_rand_str(char * char_14);
+
+double fast_fisher_test_one_side(unsigned int a, unsigned int b, unsigned int c, unsigned int d, long double * frac_buffer, int buffer_size);
 #endif
diff --git a/src/Makefile b/src/Makefile
index 9215779..41e8d9f 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -6,7 +6,6 @@ all:
 	@echo "  For building subread in Linux, please run ' make -f Makefile.Linux '."
 	@echo "  For building subread in Mac OS X, please run ' make -f Makefile.MacOS '."
 	@echo "  For building subread in FreeBSD, please run ' gmake -f Makefile.FreeBSD '."
-	@echo "  For building subread in Oracle Solaris or OpenSolaris, please run ' gmake -f Makefile.SunOS '."
 	@echo
 	@echo "  The default compiler is gcc; you may change it by editing the Makefiles for platforms."
 	@echo
diff --git a/src/SNPCalling.c b/src/SNPCalling.c
index fbd8b03..e34184f 100644
--- a/src/SNPCalling.c
+++ b/src/SNPCalling.c
@@ -99,7 +99,7 @@ struct SNP_Calling_Parameters{
 
 #define PRECALCULATE_FACTORIAL 2000000
 
-double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
+long double * precalculated_factorial;// [PRECALCULATE_FACTORIAL];
 
 double factorial_float_real(int a)
 {
@@ -113,7 +113,6 @@ double factorial_float_real(int a)
 
 double factorial_float(int a)
 {
-
 	if(a<PRECALCULATE_FACTORIAL && (precalculated_factorial[a]!=0.))
 		return precalculated_factorial[a]; 
 	else
@@ -154,25 +153,43 @@ double fisher_exact_test(int a, int b, int c, int d)
 
 	//printf("FET: %d %d %d %d\n", a, b, c, d);
 
-	if (a * d > b * c) {
-            a = a + b; b = a - b; a = a - b; 
-            c = c + d; d = c - d; c = c - d;
-        }
 
-        if (a > d) { a = a + d; d = a - d; a = a - d; }
-        if (b > c) { b = b + c; c = b - c; b = b - c; }
+	if(1){
+		double ret = fast_fisher_test_one_side(a,b,c,d, precalculated_factorial, PRECALCULATE_FACTORIAL);
+		//SUBREADprintf("FISHER_RES %d %d %d %d %.9f %.9f\n", a,b,c,d, ret, log(ret));
+		return ret;
+	}else{
+		int AA=a, BB=b, CC=c, DD=d;
+		if (a * d > b * c) {
+		    a = a + b; b = a - b; a = a - b; 
+		    c = c + d; d = c - d; c = c - d;
+		}
 
-        double p_sum = 0.0;
+		if (a > d) { a = a + d; d = a - d; a = a - d; }
+		if (b > c) { b = b + c; c = b - c; b = b - c; }
 
-	double p = fisherSub(a, b, c, d);
-	while (a >= 0) {
-	    p_sum += p;
-	    if (a == 0) break;
-	    --a; ++b; ++c; --d;
-	    p = fisherSub(a, b, c, d);
-	}
+		double p_sum = 0.0;
+
+		double p = fisherSub(a, b, c, d);
+		while (a >= 0) {
+		    p_sum += p;
+		    if (a == 0) break;
+		    --a; ++b; ++c; --d;
+		    p = fisherSub(a, b, c, d);
+		}
 
-	return p_sum;
+		if(1){
+			// DON'T PUT IT HERE!
+			double r1 = fast_fisher_test_one_side(AA,BB,CC,DD, NULL, PRECALCULATE_FACTORIAL);
+			if(abs(r1-p_sum) / max(r1,p_sum) >= 0.01){
+				printf("BADR: FAST = %.7f != JAVA %.7f,  %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
+			}else{
+				printf("GOODR: FAST = %.7f ~= JAVA %.7f,  %d %d %d %d\n", log(r1), log(p_sum),AA,BB,CC,DD);
+			}
+		}
+
+		return p_sum;
+	}
 }
 
 unsigned int fisher_test_size;
@@ -508,7 +525,8 @@ void fishers_test_on_block(struct SNP_Calling_Parameters * parameters, float * s
 				}
 
 				float p_middle = fisher_exact_test(a, flanking_unmatched, c, flanking_matched);
-				//SUBREADprintf("TEST: %u  a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
+				if(0 && flanking_matched > 10000 && p_middle < 1E-5)
+					SUBREADprintf("TEST: %u  a,b,c,d=%d %d %d %d; FU=%d FM=%d; Goahead=%d; Tailleft=%d; p=%G; p-cut=%G\n", i,a,b,c,d, flanking_unmatched, flanking_matched, go_ahead, left_tail, p_middle, p_cutoff);
 				if(all_result_needed ||  ( p_middle < p_cutoff && flanking_matched*20>(flanking_matched+ flanking_unmatched )*16)) 
 					snp_fisher_raw [i] = p_middle;
 				else	snp_fisher_raw [i] = -999;
@@ -1296,6 +1314,7 @@ void EXSNP_SIGINT_hook(int param)
 							unlink(del_name);
 						}
 					}
+					closedir(d);
 				}
 			}
 				
@@ -1328,8 +1347,8 @@ int SNP_calling(char * in_SAM_file, char * out_BED_file, char * in_FASTA_file, c
 
 	fisher_test_size = 0;
 
-	precalculated_factorial = (double*)malloc(sizeof(double)*PRECALCULATE_FACTORIAL);
-	for(i=0; i<PRECALCULATE_FACTORIAL; i++)
+	precalculated_factorial = (long double*)malloc(sizeof(long double)*PRECALCULATE_FACTORIAL * 2);
+	for(i=0; i<PRECALCULATE_FACTORIAL * 2; i++)
 		precalculated_factorial[i] = 0.; 
 		
 
@@ -1493,7 +1512,7 @@ void print_usage_snp(char * myname)
 	SUBREADputs("             location must have (ie. the minimum coverage). 1 by default.");
 	SUBREADputs("");
 	SUBREADputs("  -x <int>   Specify the maximum number of mapped reads a SNP-containing");
-	SUBREADputs("             location have have. 3000 by default. Any location having more than");
+	SUBREADputs("             location have have. 1000 by default. Any location having more than");
 	SUBREADputs("             the threshold number of reads will not be considered for SNP");
 	SUBREADputs("             calling. This option is useful for removing PCR artefacts.");
 	SUBREADputs("");
@@ -1567,7 +1586,7 @@ int main_snp_calling_test(int argc,char ** argv)
 	parameters.supporting_read_rate = 0.;
 	parameters.min_supporting_read_number = 1;
 	parameters.min_alternative_read_number = 1;
-	parameters.max_supporting_read_number = 3000;
+	parameters.max_supporting_read_number = 1000;
 	parameters.neighbour_filter_testlen = -1; 
 	parameters.neighbour_filter_rate = 0.000000001;
 	parameters.min_phred_score = 13;
diff --git a/src/core-indel.c b/src/core-indel.c
index 9a0fe5a..31d4bfe 100644
--- a/src/core-indel.c
+++ b/src/core-indel.c
@@ -1752,7 +1752,7 @@ int find_new_indels(global_context_t * global_context, thread_context_t * thread
 			{
 				unsigned int head_indel_left_edge = head_indel_pos + current_result->selected_position - 1;
 				//head_indel_left_edge -= max(0, head_indel_movement);
-				if(head_indel_left_edge>=0 && abs(head_indel_movement)<=global_context -> config.max_indel_length)
+				if(abs(head_indel_movement)<=global_context -> config.max_indel_length)
 				{
 					local_add_indel_event(global_context, thread_context, event_table, read_text + head_indel_pos, head_indel_left_edge, head_indel_movement, 1, 1, 0);
 					mark_gapped_read(current_result);
@@ -2126,7 +2126,7 @@ int finalise_db_graph(global_context_t * global_context, reassembly_block_contex
 		else
 		{
 			int test_next;
-			int all_reads;
+			int all_reads = 0;
 			int ignored_moves=0;
 			int max_move_reads=0;
 			unsigned long long int previous_key = current_key<<2;
@@ -3886,6 +3886,7 @@ void COREMAIN_SIGINT_hook(int param)
 						unlink(del_name);
 					}
 				}
+				closedir(d);
 			}
 		}
 			
diff --git a/src/core.c b/src/core.c
index 7423c0e..3e47262 100644
--- a/src/core.c
+++ b/src/core.c
@@ -1221,34 +1221,37 @@ int convert_read_to_tmp(global_context_t * global_context , subread_output_conte
 		{			
 			chimeric_sections = chimeric_cigar_parts(global_context, r->linear_position, is_first_section_jumped ^ current_strand, is_first_section_jumped, r->current_cigar_decompress, r->out_poses, output_context->out_cigar_buffer, r->out_strands, read_len, r->out_lens);
 
-			int xk1;
-			r->chimeric_sections = chimeric_sections;
-			strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
-			for(xk1=1; xk1<chimeric_sections; xk1++)
-			{
-				int chimeric_pos;
-				char * chimaric_chr;
-				strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
-				unsigned int vitual_head_pos = r->out_poses[xk1];
-				char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
+			if(chimeric_sections > 0){
+				int xk1;
+				r->chimeric_sections = chimeric_sections;
+				strcpy(r->out_cigars[0], output_context->out_cigar_buffer[0]);
+				for(xk1=1; xk1<chimeric_sections; xk1++)
+				{
+					int chimeric_pos;
+					char * chimaric_chr;
+					strcpy(r->out_cigars[xk1], output_context->out_cigar_buffer[xk1]);
+					unsigned int vitual_head_pos = r->out_poses[xk1];
+					char strand_xor = (r->out_strands[xk1] == '-') != is_second_read;//!= (r->out_strands[0]=='-') ;
 
-				//if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
+					//if(( r->out_strands[xk1] == '-' ) != (r->out_strands[0]=='-' )) vitual_head_pos = move_to_read_head(vitual_head_pos, r->out_cigars[xk1]);
 
-				if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
-				{
-					int soft_clipping_movement = 0;
-					soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
-					assert(chimaric_chr);
-					sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
+					if(0==locate_gene_position_max(vitual_head_pos ,& global_context -> chromosome_table, & chimaric_chr, & chimeric_pos, NULL, NULL, 0+r->out_lens[xk1]))
+					{
+						int soft_clipping_movement = 0;
+						soft_clipping_movement = get_soft_clipping_length(r->out_cigars[xk1]);
+						assert(chimaric_chr);
+						sprintf(r->additional_information + strlen(r->additional_information), "\tCG:Z:%s\tCP:i:%u\tCT:Z:%c\tCC:Z:%s", r->out_cigars[xk1] , max(1,chimeric_pos + soft_clipping_movement + 1), strand_xor?'-':'+' , chimaric_chr );
+					}
+					else is_r_OK = 0;
 				}
-				else is_r_OK = 0;
-			}
-			r->linear_position = r->out_poses[0];
-			r->strand = r->out_strands[0]=='-';
+				r->linear_position = r->out_poses[0];
+				r->strand = r->out_strands[0]=='-';
 
-			strcpy(r->cigar , r->out_cigars[0]);
+				strcpy(r->cigar , r->out_cigars[0]);
+			}else is_r_OK = 0;
 		}
-		r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
+		if(is_r_OK)
+			r->soft_clipping_movements = get_soft_clipping_length(r->cigar);
 	}
 
 	if(is_r_OK)
@@ -4189,6 +4192,14 @@ int chimeric_cigar_parts(global_context_t * global_context, unsigned int sel_pos
 				int curr_offset, new_offset;
 				locate_gene_position_max(current_perfect_cursor, &global_context -> chromosome_table, & curr_chr, & curr_offset, NULL, NULL, 1);
 				locate_gene_position_max(jummped_location      , &global_context -> chromosome_table, &  new_chr, &  new_offset, NULL, NULL, 1);
+				if( curr_chr == NULL || new_chr == NULL ){
+					/*
+					char outpos[100];
+					absoffset_to_posstr(global_context, sel_pos + 1, outpos);
+					SUBREADprintf("Wrong CIGAR: mapped to %s, CIGAR=%s\n", outpos , in_cigar);
+					*/
+					return -1;
+				}
 				assert(curr_chr);
 				assert(new_chr);
 				is_chro_jump = (curr_chr != new_chr);
diff --git a/src/input-files.c b/src/input-files.c
index 9b704e3..7e6f8f4 100644
--- a/src/input-files.c
+++ b/src/input-files.c
@@ -2181,6 +2181,7 @@ void delete_with_prefix(char * prefix){
 						//test fix
 					}
 				}
+				closedir(d);
 			}
 		}
 			
@@ -2345,6 +2346,19 @@ void SAM_pairer_set_unsorted_notification(SAM_pairer_context_t * pairer, void (*
 	pairer -> unsorted_notification = unsorted_notification;
 }
 
+
+int SAM_pairer_warning_file_open_limit(){
+
+	struct rlimit limit_st;
+	getrlimit(RLIMIT_NOFILE, & limit_st);
+
+	if(min(limit_st.rlim_cur, limit_st.rlim_max  ) < MIN_FILE_POINTERS_ALLOWED){
+		SUBREADprintf(" ERROR: the maximum file open number (%d) is too low. Please increase this number to a number larger than 50 by using the 'ulimit -n' command. This program has to terminate now.\n\n",(int)(min(limit_st.rlim_cur, limit_st.rlim_max)));
+		return 1;
+	}
+	return 0;
+}
+
 // Tiny_Mode only write the following information:
 // Name   Flag   Chro   Pos   Mapq   Cigar   MateChro   MatePos   Tlen  N  I  NH:i:xx  HI:i:xx
 // Tiny_Mode does not work when output and input are both in BAM format
@@ -2357,6 +2371,8 @@ int SAM_pairer_create(SAM_pairer_context_t * pairer, int all_threads, int bin_bu
 	pairer -> input_fp = f_subr_open(in_file, "rb");
 	if(NULL == pairer -> input_fp) return 1;
 
+	SAM_pairer_warning_file_open_limit(pairer);
+
 	pairer -> input_is_BAM = BAM_input;
 	pairer -> tiny_mode = is_Tiny_Mode;
 	pairer -> reset_output_function = reset_output_function;
@@ -2696,10 +2712,10 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
 				BAM_next_u32(bam_signature);
 				BAM_next_u32(pairer -> BAM_l_text);
 				char * header_txt = NULL;
+				int header_txt_dynamic_length = -1;
 
-				if(pairer->BAM_l_text>0) header_txt = malloc(pairer->BAM_l_text);
+				if(pairer->BAM_l_text>0) header_txt = malloc(max(1000000,pairer->BAM_l_text));
 
-				//SUBREADprintf("HEAD_TXT=%d\n", pairer -> BAM_l_text);
 				for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
 					BAM_next_nch;
 					header_txt [x1] = nch;
@@ -2707,13 +2723,23 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
 				pairer -> output_header(pairer, thread_context -> thread_id, 1, pairer -> BAM_l_text , header_txt , pairer -> BAM_l_text );
 
 				BAM_next_u32(pairer -> BAM_n_ref);
-				//SUBREADprintf("HEAD_CHRO=%d\n", pairer -> BAM_n_ref);
 				unsigned int ref_bin_len = 0;
 				for(x1 = 0; x1 < pairer -> BAM_n_ref; x1++) {
 					unsigned int l_name, l_ref, x2;
 					char ref_name[MAX_CHROMOSOME_NAME_LEN];
 					BAM_next_u32(l_name);
 					assert(l_name < 256);
+
+					if(header_txt == NULL){
+						header_txt = malloc(3000000);
+						header_txt_dynamic_length = 3000000;
+					}
+
+					if( header_txt_dynamic_length>0 && ref_bin_len > header_txt_dynamic_length - 1000000 ){
+						header_txt_dynamic_length *= 2;
+						header_txt = realloc( header_txt,  header_txt_dynamic_length);
+					}
+
 					memcpy(header_txt + ref_bin_len, &l_name, 4);
 					ref_bin_len += 4;
 					for(x2 = 0; x2 < l_name; x2++){
@@ -2725,10 +2751,7 @@ int SAM_pairer_get_next_read_BIN( SAM_pairer_context_t * pairer , SAM_pairer_thr
 					memcpy(header_txt + ref_bin_len, &l_ref, 4);
 					ref_bin_len += 4;
 
-					if(ref_bin_len >= pairer -> BAM_l_text){
-						SUBREADprintf("%d-th ref : %s [len=%u], bin_len=%d < %d\n", x1, ref_name, l_ref, ref_bin_len,  pairer -> BAM_l_text);
-						assert(ref_bin_len < pairer -> BAM_l_text);
-					}
+					//SUBREADprintf("%d-th ref : %s [len=%u], bin_len=%d < %d\n", x1, ref_name, l_ref, ref_bin_len,  pairer -> BAM_l_text);
 				}
 
 				//exit(0);
@@ -3327,6 +3350,7 @@ void SAM_pairer_reset( SAM_pairer_context_t * pairer ) {
 	pairer -> BAM_header_parsed = 0;
 	pairer -> total_input_reads = 0;
 	pairer -> input_chunk_no = 0;
+	pairer -> merge_level_finished = 0;
 	for(x1 = 0; x1 < pairer -> total_threads ; x1 ++){
 		pairer -> threads[x1].reads_in_SBAM = 0;
 		pairer -> threads[x1].input_buff_BIN_used = 0;
@@ -3452,8 +3476,6 @@ int SAM_pairer_do_next_read( SAM_pairer_context_t * pairer , SAM_pairer_thread_t
 	int has_next_read = SAM_pairer_get_next_read_BIN(pairer, thread_context, &bin, &bin_len);
 	if(has_next_read){
 		int name_len = SAM_pairer_get_read_full_name(pairer, thread_context, bin, bin_len, read_full_name, & this_flags);
-		if(0 && FIXLENstrcmp("V0112_0155:7:1206:5677:116578", read_full_name) == 0)
-			SUBREADprintf("FNNM:%s, FLAG=%d, LASTFN=%s\n", read_full_name , this_flags, thread_context -> immediate_last_read_full_name);
 
 		if(pairer -> is_single_end_mode == 0 && ( this_flags & 1 ) == 1){ // if the reads are PE
 
@@ -3569,7 +3591,7 @@ int SAM_pairer_osr_next_name(FILE * fp , char * name, int thread_no, int all_thr
 		int rlen2 = fread(name, 1, rlen, fp);
 		if(rlen2 != rlen) return 0;
 		name[rlen]=0;
-		if( SAM_pairer_osr_hash(name)% all_threads == thread_no  )
+		if(all_threads < 0 || SAM_pairer_osr_hash(name)% all_threads == thread_no  )
 		{
 			fseek(fp, -2-rlen, SEEK_CUR);
 			return 1;
@@ -3607,42 +3629,256 @@ int SAM_pairer_is_matched_chunks(char * c1, char * c2){
 	return i2==i1;
 }
 
-void * SAM_pairer_rescure_orphants(void * params){
 
-	void ** param_ptr = (void **) params;
-	SAM_pairer_context_t * pairer = param_ptr[0];
-	int thread_no = (int)(param_ptr[1]-NULL);
-	free(params);
 
-	int orphant_fp_size = 50, orphant_fp_no=0;
-	FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
-	int thno, bkno, x1;
+
+
+
+void merge_level_fps(SAM_pairer_context_t * pairer, char * fname, FILE ** fps, int fps_no){
 	char * bin_tmp1 , * bin_tmp2;
+	int max_name_len = MAX_READ_NAME_LEN*2 +80, x1;
 
-	if(0 == thread_no && pairer -> display_progress)
-		SUBREADprintf("Finished scanning the input file. Processing unpaired reads.\n");
+	char tmp_fname[MAX_FILE_NAME_LENGTH];
+	sprintf(tmp_fname, "%s-MERGE-TMP.tmp", pairer->tmp_file_prefix);
+
+	char * names = malloc(  fps_no  * max_name_len );
 
 	bin_tmp1 = malloc(66000);
 	bin_tmp2 = malloc(66000);
+	FILE * out_fp = fopen(tmp_fname, "wb");
+
+
+	// initialize the "current_first_name" for each orphan file
+	
+	for(x1 = 0 ; x1 < fps_no; x1++)
+	{
+		int has = SAM_pairer_osr_next_name( fps[x1] , names + max_name_len*x1 , -1 , -1);
+		if(!has) *(names + max_name_len*x1)=0;
+	}
+
+
+	while(1){
+		int min_name_fileno = -1;
+		int min2_name_fileno = -1;
+
+		// find the min_name in all FPs
+		// and find the same min_name if there is any
+
+		for(x1 = 0 ; x1 < fps_no; x1++){
+			int has = *(names + max_name_len*x1);
+			if(has){
+				int strcv_12 = 1;
+				if(min_name_fileno >=0) strcv_12 = strcmp(names+(min_name_fileno * max_name_len), names+(x1 * max_name_len));
+				if(strcv_12 > 0){
+					min_name_fileno = x1;
+					min2_name_fileno = -1;
+				}else if( strcv_12 == 0){
+					min2_name_fileno = x1;
+				}
+			}
+
+		}
+
+
+		if(min_name_fileno >= 0){
+			SAM_pairer_osr_next_bin( fps[ min_name_fileno ] , bin_tmp1);
+
+			if(min2_name_fileno>=0){
+				SAM_pairer_osr_next_bin( fps[ min2_name_fileno ] , bin_tmp2);
+				pairer -> output_function(pairer, 0,  names + max_name_len*min_name_fileno , (char*) bin_tmp1, (char*)bin_tmp2);
+
+				if(0 == pairer -> is_unsorted_notified){
+					char * name_tmp_1 = malloc(strlen(names+(min_name_fileno * max_name_len))+5), *name_tmp_2 = malloc(strlen(names+(min_name_fileno * max_name_len))+5);
+					char * min1_chunk_info, * min2_chunk_info;
+					sprintf(name_tmp_1, "C:%s:%d", names+(min_name_fileno * max_name_len), 0);
+					sprintf(name_tmp_2, "C:%s:%d", names+(min2_name_fileno * max_name_len), 1);
+					min1_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_1);
+					min2_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_2);
+					if(min1_chunk_info == NULL || min2_chunk_info == NULL || !SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)){
+						sprintf(name_tmp_1, "B:%s:%d", names+(min_name_fileno * max_name_len), 0);
+						if( pairer -> unsorted_notification ){
+							//SUBREADprintf("FINAL STEP\n");
+							pairer -> unsorted_notification(pairer ,  HashTableGet( pairer -> unsorted_notification_table , name_tmp_1), NULL);
+						}
+						pairer -> is_unsorted_notified = 1;
+					}
+				}
+
+				int read_has = SAM_pairer_osr_next_name( fps[min2_name_fileno],  names + max_name_len*min2_name_fileno, -1, -1);
+				if(!read_has) *(names + max_name_len*min2_name_fileno)=0;
+			}else{
+				unsigned short wlen;
+				unsigned int rbinlen = 0;
+				wlen = strlen( names+(min_name_fileno * max_name_len) );
+				fwrite( &wlen, 2, 1,out_fp );
+				fwrite( names+(min_name_fileno * max_name_len), 1, wlen, out_fp );
+				memcpy( &rbinlen, bin_tmp1 , 2);
+				rbinlen += 4;
+				fwrite( bin_tmp1, 2, 1, out_fp ); 
+				fwrite( bin_tmp1, 1, rbinlen, out_fp ); 
+			}
+			int read_has = SAM_pairer_osr_next_name( fps[min_name_fileno],  names + max_name_len*min_name_fileno, -1, -1);
+			if(!read_has) *(names + max_name_len*min_name_fileno)=0;
+		} else break;
+	}
 
+	fclose(out_fp);
+	unlink(fname);
+	rename(tmp_fname, fname);
+	free(names);
+
+}
+#define PAIRER_WAIT_TICK_TIME 10000
+
+int SAM_pairer_get_merge_max_fp(SAM_pairer_context_t * pairer){
+	return pairer -> max_file_open_number;
+
+}
+
+void SAM_pairer_set_merge_max_fp(SAM_pairer_context_t * pairer, int fon){
+	pairer -> max_file_open_number = fon;
+}
+
+
+void SAM_pairer_probe_maxfp( SAM_pairer_context_t * pairer){
+	int orphant_fp_no=0;
+	int thno, bkno, x1;
+	int thread_fps [ pairer -> total_threads ];
+	char tmp_fname[MAX_FILE_NAME_LENGTH];
+
+	memset(thread_fps, 0, sizeof(int) * pairer -> total_threads);
 	for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
 		for( bkno = 0 ; ; bkno++){
-			char tmp_fname[MAX_FILE_NAME_LENGTH];
 			sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix,  thno, bkno);
-
 			FILE * in_fp = fopen(tmp_fname, "rb");
 			if(NULL == in_fp) break;
-			if(orphant_fp_no >= orphant_fp_size){
-				orphant_fp_size *= 1.5;
+			thread_fps[thno] = bkno;
+			fclose(in_fp);
+			orphant_fp_no ++;
+		}
+	}
+
+	int max_open_fps = 0, has_limit = 0;
+	int orphant_fp_size = 50;
+	FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
+
+	for( bkno = 0 ; bkno < 5; bkno++){
+		sprintf(tmp_fname, "%s-FTEST-%d.tmp", pairer->tmp_file_prefix, bkno);
+		FILE * tfp = fopen(tmp_fname, "w");
+		if(NULL == tfp){
+			has_limit = 1;
+			break;
+		}
+		orphant_fps[max_open_fps++] = tfp;
+	}
+	//#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+	for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+		if(has_limit) break;
+		for( bkno = 0 ; ; bkno++){
+			sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix,  thno, bkno);
+			FILE * in_fp = fopen(tmp_fname, "rb");
+			if(NULL == in_fp){
+				if( bkno <= thread_fps[thno] ) has_limit = 1;
+				break;
+			}
+			orphant_fps[max_open_fps++] = in_fp;
+			if(max_open_fps >= orphant_fp_size - 1){
+				orphant_fp_size *= 2;
 				orphant_fps = realloc(orphant_fps, orphant_fp_size * sizeof(FILE *));
 			}
-			orphant_fps[orphant_fp_no++]=in_fp;
 		}
 	}
 
-	int max_name_len = MAX_READ_NAME_LEN*2 +80;
+	for( bkno = 0 ;bkno < max_open_fps; bkno ++) fclose(orphant_fps[bkno]);
+
+	SAM_pairer_set_merge_max_fp(pairer, max_open_fps - 5);
+
+	//#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+	//SUBREADprintf("Needed FPS = %d, Ulimit FPS = %d, Has_Limit = %d  \n", orphant_fp_no, max_open_fps, has_limit);
+
+	if( SAM_pairer_get_merge_max_fp(pairer) < orphant_fp_no * pairer -> total_threads){
+		int processed_orphant = 0;
+		int current_opened_fp_no = 0 ;
+		FILE * level_merge_fps [ SAM_pairer_get_merge_max_fp(pairer) ];
+		for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+			for( bkno = 0 ; ; bkno++){
+				char tmp_fname[MAX_FILE_NAME_LENGTH];
+				sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix,  thno, bkno);
+
+				FILE * in_fp = fopen(tmp_fname, "rb");
+				if(NULL == in_fp) break;
+
+	//			#warning ">>>> COMMENT DEBUG OUTPUT <<<<"
+	//			SUBREADprintf("Adding temp file:%s\n", tmp_fname);
+				level_merge_fps[current_opened_fp_no ++] = in_fp;
+				processed_orphant ++;
+				if(current_opened_fp_no >= SAM_pairer_get_merge_max_fp(pairer) || processed_orphant == orphant_fp_no){
+					sprintf(tmp_fname, "%s-LEVELMERGE.tmp", pairer->tmp_file_prefix);
+
+	//				#warning ">>>> COMMENT DEBUG OUTPUT <<<<"
+	//				SUBREADprintf("Merging temp files\n");
+					merge_level_fps(pairer , tmp_fname, level_merge_fps, current_opened_fp_no);
+					for(x1 = 0; x1 < current_opened_fp_no; x1++) fclose(level_merge_fps[x1]);
+
+					if(processed_orphant < orphant_fp_no){
+						level_merge_fps[0] = fopen(tmp_fname, "rb");
+						current_opened_fp_no = 1;
+					}
+				}
+			}
+		}
+		pairer -> merge_level_finished = 1;
+	}
+	free(orphant_fps);
+
+}
+
+void * SAM_pairer_rescure_orphants_max_FP(void * params){
+	void ** param_ptr = (void **) params;
+	SAM_pairer_context_t * pairer = param_ptr[0];
+	int thread_no = (int)(param_ptr[1]-NULL);
+	free(params);
+
+	unsigned long long died=0;
+	int orphant_fp_no=0;
+	int thno, bkno, x1;
+	char tmp_fname[MAX_FILE_NAME_LENGTH];
+
+	int max_name_len = MAX_READ_NAME_LEN*2 +80, orphant_fp_size = 50;
+	FILE ** orphant_fps = malloc(sizeof(FILE *) * orphant_fp_size);
+
+	if(0 == thread_no && pairer -> display_progress)
+		SUBREADprintf("Finished scanning the input file. Processing unpaired reads.\n");
+
+	//SUBREADprintf("merged = %d\n", pairer -> merge_level_finished);
+	if(pairer -> merge_level_finished){
+		sprintf(tmp_fname, "%s-LEVELMERGE.tmp", pairer->tmp_file_prefix);
+		FILE * in_fp = fopen(tmp_fname, "rb");
+		orphant_fps[0] = in_fp;
+		orphant_fp_no=1;
+	}else{
+		orphant_fp_no = 0;
+		for( thno = 0 ; thno < pairer -> total_threads ; thno ++ ){
+			for( bkno = 0 ; ; bkno++){
+				sprintf(tmp_fname, "%s-TH%02d-BK%06d.tmp", pairer->tmp_file_prefix,  thno, bkno);
+
+				FILE * in_fp = fopen(tmp_fname, "rb");
+				if(NULL == in_fp) break;
+				if(orphant_fp_no >= orphant_fp_size){
+					orphant_fp_size *= 1.5;
+					orphant_fps = realloc(orphant_fps, orphant_fp_size * sizeof(FILE *));
+				}
+				orphant_fps[orphant_fp_no++]=in_fp;
+			}
+		}
+	}
+
 	char * names = malloc( orphant_fp_no * max_name_len );
 	memset(names, 0, orphant_fp_no * max_name_len );
+	char * bin_tmp1 , * bin_tmp2;
+	bin_tmp1 = malloc(66000);
+	bin_tmp2 = malloc(66000);
+
 	
 	for(x1 = 0 ; x1 < orphant_fp_no; x1++)
 	{
@@ -3650,7 +3886,6 @@ void * SAM_pairer_rescure_orphants(void * params){
 		if(!has) *(names + max_name_len*x1)=0;
 	}
 
-	unsigned long long rescured=0, died=0;
 
 	while(1){
 		int min_name_fileno = -1;
@@ -3685,8 +3920,10 @@ void * SAM_pairer_rescure_orphants(void * params){
 					sprintf(name_tmp_2, "C:%s:%d", names+(min2_name_fileno * max_name_len), 1);
 					min1_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_1);
 					min2_chunk_info = HashTableGet( pairer -> unsorted_notification_table , name_tmp_2);
+					//#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
 					//SUBREADprintf("RESCURE MATCHER:  %s , %s ==  %s , %s, %s\n", name_tmp_1, name_tmp_2, min1_chunk_info, min2_chunk_info,
 					//	SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)?"MATCH":"XXXXX");
+
 					if(min1_chunk_info == NULL || min2_chunk_info == NULL || !SAM_pairer_is_matched_chunks(min1_chunk_info, min2_chunk_info)){
 						sprintf(name_tmp_1, "B:%s:%d", names+(min_name_fileno * max_name_len), 0);
 						if( pairer -> unsorted_notification ){
@@ -3699,19 +3936,26 @@ void * SAM_pairer_rescure_orphants(void * params){
 
 				int read_has = SAM_pairer_osr_next_name( orphant_fps[min2_name_fileno],  names + max_name_len*min2_name_fileno, thread_no,  pairer-> total_threads);
 				if(!read_has) *(names + max_name_len*min2_name_fileno)=0;
-				rescured++;
 			}else{
+				//#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
 				//SUBREADprintf("FINAL_ORPHAN:%s\n" , names + max_name_len*min_name_fileno);
 				pairer -> output_function(pairer, thread_no,  names + max_name_len*min_name_fileno, (char*) bin_tmp1, NULL);
 				died++;
 			}
 
 			int read_has = SAM_pairer_osr_next_name( orphant_fps[min_name_fileno],  names + max_name_len*min_name_fileno, thread_no, pairer-> total_threads);
+			//#warning ">>>>>>> COMMENT NEXT BLOCK <<<<<<<<"
+			if(0){
+					if(!read_has) SUBREADprintf("FP %d FINISHED\n", min_name_fileno);
+				}
 			if(!read_has) *(names + max_name_len*min_name_fileno)=0;
 		} else break;
 	}
 	free(names);
 
+	//#warning ">>>>>>> COMMENT NEXT LINE <<<<<<<<"
+	//SUBREADprintf("finished_fps= %d\n", orphant_fp_no);
+
 	for(x1 = 0 ; x1 < orphant_fp_no; x1++)
 	{
 		fclose ( orphant_fps[x1] );
@@ -3719,10 +3963,10 @@ void * SAM_pairer_rescure_orphants(void * params){
 	free( bin_tmp1 );
 	free( bin_tmp2 );
 	pairer -> total_orphan_reads += died;
-	//SUBREADprintf("RESCURE THREAD %d   Rescured %llu, Died %llu\n", thread_no, rescured, died);
 	return NULL;
 }
 
+
 void SAM_pairer_update_orphant_table(SAM_pairer_context_t * pairer , SAM_pairer_thread_t * thread_context){
 	unsigned int x2 = 0;
 	unsigned char ** name_list, ** bin_list;
@@ -3815,7 +4059,17 @@ int is_read_bin(char * bin, int bin_len, int max_refID){
 		int cigar_op = cigar_v & 0xf;
 		int cigar_value = cigar_v & 0xfffffff;
 		if(cigar_op > 8) return -12;
-		if((cigar_op == 0 || cigar_op == 1 || cigar_op > 4) && (cigar_value < 1 || cigar_value > MAX_BIN_RECORD_LENGTH)) return -13;
+
+		if((cigar_op == 0 || cigar_op == 1 || cigar_op > 6) && (cigar_value < 1 || cigar_value > MAX_BIN_RECORD_LENGTH)){
+
+			//#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<"
+			if(0){
+				char * rname = bin + 36;
+				SUBREADprintf("OP=%d, VAL=%d [%s]\n", cigar_op, cigar_value, rname);
+			}
+
+			return -13;
+		}
 	}
 
 	int ext_cursor = 36 + name_len + 4*cigar_opts + l_seq + (l_seq+1)/2;
@@ -3857,7 +4111,6 @@ int SAM_pairer_find_start(SAM_pairer_context_t * pairer , SAM_pairer_thread_t *
 	}
 }
 
-#define PAIRER_WAIT_TICK_TIME 10000
 
 void * SAM_pairer_thread_run( void * params ){
 	void ** param_ptr = (void **) params;
@@ -3931,6 +4184,9 @@ int SAM_pairer_run_once( SAM_pairer_context_t * pairer){
 	}
 
 	if(0 == pairer -> is_bad_format){
+
+		SAM_pairer_probe_maxfp( pairer );
+
 		for(x1 = 0; x1 < pairer -> total_threads ; x1++){
 			// this 16-byte memory block is freed in the thread worker.
 
@@ -3938,7 +4194,7 @@ int SAM_pairer_run_once( SAM_pairer_context_t * pairer){
 
 			init_params[0] = pairer;
 			init_params[1] = (void *)(NULL+x1);
-			pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_rescure_orphants, init_params);
+			pthread_create(&(pairer -> threads[x1].thread_stab), NULL, SAM_pairer_rescure_orphants_max_FP, init_params);
 		}
 
 		for(x1 = 0; x1 < pairer -> total_threads ; x1++){
@@ -4189,7 +4445,11 @@ void SAM_pairer_fix_format(SAM_pairer_context_t * pairer){
 			block_size += (nch << (8 * x1));
 		}
 
-		//SUBREADprintf("Bsize=%d\n", block_size);
+		//#warning ">>>>>> COMMENT NEXT BLOCK <<<<<<"
+		if(0){	
+			if(block_size > 65000)
+				SUBREADprintf("Bsize=%d\n", block_size);
+		}
 		if(block_size > 60000 && !pairer -> tiny_mode){
 			pairer -> is_bad_format = 1;
 			SUBREADprintf("ERROR: the read record length (%d) is longer than the limit. The program has to terminate. \n", block_size);
@@ -4454,7 +4714,7 @@ void SAM_nosort_run_once(SAM_pairer_context_t * pairer){
 		unsigned int bam_signature;
 		NOSORT_BAM_next_u32(bam_signature);
 		NOSORT_BAM_next_u32(pairer -> BAM_l_text);
-		char * header_txt = malloc(pairer->BAM_l_text);
+		char * header_txt = malloc(max(1000000,pairer->BAM_l_text));
 
 		for(x1 = 0 ; x1 < pairer -> BAM_l_text; x1++){
 			NOSORT_BAM_next_nch;
@@ -4669,12 +4929,14 @@ int SAM_pairer_run( SAM_pairer_context_t * pairer){
 
 	if(pairer -> force_do_not_sort){
 		SAM_nosort_run_once(pairer);
-	}else for(corrected_run = 0; corrected_run < 2 ; corrected_run ++){
+
+	}else for(corrected_run = 0; corrected_run < 2  ; corrected_run ++){
 		SAM_pairer_run_once(pairer);
 		if(pairer -> is_bad_format && pairer->input_is_BAM){
-			assert(0 == corrected_run);
-			if(pairer -> display_progress)
-				SUBREADprintf("Retrying with the corrected format...\n");
+			//#warning ">>>>>> REMOVE '+ 1' FROM NEXT LINE IN RELEASE <<<<<<"
+			assert(1 != corrected_run);
+			//#warning ">>>>>> COMMENT NEXT LINE IN RELEASE <<<<<<"
+			//SUBREADprintf("Retrying with the corrected format...\n");
 			delete_with_prefix(pairer -> tmp_file_prefix);
 			SAM_pairer_fix_format(pairer);
 			if(pairer -> is_bad_format)
diff --git a/src/input-files.h b/src/input-files.h
index f98ff3b..d7a96c3 100644
--- a/src/input-files.h
+++ b/src/input-files.h
@@ -36,6 +36,7 @@
 #define GENE_INPUT_SAM_PAIR_2   95
 
 
+#define MIN_FILE_POINTERS_ALLOWED 50
 
 #define FILE_TYPE_SAM     50
 #define FILE_TYPE_BAM     500
@@ -120,6 +121,8 @@ typedef struct {
 	int is_single_end_mode;
 	int force_do_not_sort;
 	int is_finished;
+	int merge_level_finished;
+	int max_file_open_number;
 	subread_lock_t input_fp_lock;
 	subread_lock_t output_header_lock;
 	subread_lock_t unsorted_notification_lock;
@@ -303,4 +306,5 @@ int SAM_pairer_multi_thread_header (void * pairer_vp, int thread_no, int is_text
 int SAM_pairer_writer_create( SAM_pairer_writer_main_t * bam_main , int all_threads, int has_dummy , int BAM_output, int BAM_compression_level, char * out_file);
 void SAM_pairer_writer_destroy( SAM_pairer_writer_main_t * bam_main ) ;
 int SAM_pairer_iterate_int_tags(unsigned char * bin, int bin_len, char * tag_name, int * saved_value);
+int SAM_pairer_warning_file_open_limit();
 #endif
diff --git a/src/makefile.version b/src/makefile.version
index 177d75d..3266789 100644
--- a/src/makefile.version
+++ b/src/makefile.version
@@ -1,4 +1,4 @@
-SUBREAD_VERSION_BASE=1.5.0-p2
+SUBREAD_VERSION_BASE=1.5.0-p3
 SUBREAD_VERSION_DATE=$(SUBREAD_VERSION_BASE)-$(shell date +"%d%b%Y")
 SUBREAD_VERSION="$(SUBREAD_VERSION_DATE)"
 SUBREAD_VERSION="$(SUBREAD_VERSION_BASE)"
diff --git a/src/propmapped.c b/src/propmapped.c
index f5d16c6..064dae7 100644
--- a/src/propmapped.c
+++ b/src/propmapped.c
@@ -109,6 +109,7 @@ void PROPMAPPED_SIGINT_hook(int param)
 						unlink(del_name);
 					}
 				}
+				closedir(d);
 			}
 		}
 			
diff --git a/src/readSummary.c b/src/readSummary.c
index a1bbc6d..8457f9c 100644
--- a/src/readSummary.c
+++ b/src/readSummary.c
@@ -936,14 +936,14 @@ int load_feature_info(fc_thread_global_context_t *global_context, const char * a
 
 			ret_features[xk1].chro_name_pos_delta = chro_name_pos - ret_features[xk1].feature_name_pos;
 			ret_features[xk1].start = atoi( start_ptr );// start 
-			if(ret_features[xk1].start<0 || ret_features[xk1].start>0x7fffffff)
+			if(ret_features[xk1].start>0x7fffffff)
 			{
 				ret_features[xk1].start = 0;
 				print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
 			}
 
 			ret_features[xk1].end = atoi( end_ptr );//end 
-			if(ret_features[xk1].end<0 || ret_features[xk1].end>0x7fffffff)
+			if(ret_features[xk1].end>0x7fffffff)
 			{
 				ret_features[xk1].end = 0;
 				print_in_box(80,0,0,"WARNING the %d-th line has a negative chro coordinate.", lineno);
@@ -4145,8 +4145,7 @@ int readSummary(int argc,char *argv[]){
 	else	isRestrictlyNoOvelrapping = 0;
 	
 
-	
-
+	if(SAM_pairer_warning_file_open_limit()) return -1;
 
 	fc_thread_global_context_t global_context;
 
diff --git a/src/test-fisher.c b/src/test-fisher.c
new file mode 100644
index 0000000..610b7fe
--- /dev/null
+++ b/src/test-fisher.c
@@ -0,0 +1,84 @@
+#include <stdlib.h> 
+#include <math.h> 
+#include <string.h> 
+#include "subread.h" 
+#include "HelperFunctions.h"
+
+
+
+double factorial_float(int a)
+{
+
+        double ret = 0;
+        while(a)
+                ret += log(a--);
+        return ret;
+}
+
+
+
+double fisherSub(int a, int b, int c, int d)
+{
+        double ret = factorial_float(a+b) + factorial_float(c+d) + factorial_float(a+c) + factorial_float(b+d) ;
+        ret -= factorial_float(a) + factorial_float(b) + factorial_float(c) + factorial_float(d) + factorial_float(a+b+c+d);
+        return pow(2.71828183, ret);
+}
+
+
+
+
+/**
+ * See HELP string or run with no arguments for usage.
+ * <p>
+ * The code used to calculate a Fisher p-value comes originally from a
+ * <a href="http://infofarm.affrc.go.jp/~kadasowa/fishertest.htm">JavaScript program</a>
+ * by T. Kadosawa (kadosawa at niaes.affrc.go.jp).
+ * Retrieved from http://www.users.zetnet.co.uk/hopwood/tools/StatTests.java on 3/Jul/2012
+ *
+ * @author David Hopwood
+ * @date   2000/04/23
+ */
+
+double fisher_exact_test(int a, int b, int c, int d)
+{
+
+	if (a * d > b * c) {
+	    a = a + b; b = a - b; a = a - b;
+	    c = c + d; d = c - d; c = c - d;
+	}
+
+	if (a > d) { a = a + d; d = a - d; a = a - d; }
+	if (b > c) { b = b + c; c = b - c; b = b - c; }
+
+	double p_sum = 0.0;
+
+	double p = fisherSub(a, b, c, d);
+	while (a >= 0) {
+	    p_sum += p;
+	    if (a == 0) break;
+	    --a; ++b; ++c; --d;
+	    p = fisherSub(a, b, c, d);
+	}
+
+	return p_sum;
+}
+
+
+long double fastfact(int x){
+	return logl(x)*x - x + 0.5 * logl(2*M_PI* x) + 1/(12.*x) - 1./(360.* x*x*x) +  1./(1260.* x*x*x*x*x) -  1./(1680.*x*x*x*x*x*x*x);// + (x>60?0:(1./(1188.*x*x*x*x*x*x*x*x*x ) ));
+}
+
+main(){
+	unsigned int 	a = 10  , c = 11,
+			b = 11 ,  d = 5000;
+
+	double fisher, fisher_old;
+	fisher = fast_fisher_test_one_side(a,b,c,d, NULL, 0);
+	fisher_old = fisher_exact_test(a,b,c,d);
+	printf("Log fisher = %.7f ; Old fisher = %.7f\n", log(fisher),log(fisher_old));
+
+	long double x1 =  1E-19L + 1E-20L;
+	long double x2 =  1L - expl(logl(0.5L) + logl(2.0L));
+	printf("New Vals: x1=%LG, x2=%LG\n", x1,x2);
+}
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/subread.git



More information about the debian-med-commit mailing list