[med-svn] [subread] 01/03: Imported Upstream version 1.4.6-p3+dfsg

Alex Mestiashvili malex-guest at moszumanska.debian.org
Tue May 19 12:01:37 UTC 2015


This is an automated email from the git hooks/post-receive script.

malex-guest pushed a commit to branch master
in repository subread.

commit 00a94adda6772649fa1cc3a394b71c497199c714
Author: Alexandre Mestiashvili <alex at biotec.tu-dresden.de>
Date:   Tue May 19 13:48:01 2015 +0200

    Imported Upstream version 1.4.6-p3+dfsg
---
 src/core.c                      |   9 +-
 src/makefile.version            |   2 +-
 src/readSummary.c               | 484 +++++++++++++++++++++++++++++++---------
 src/subread.h                   |   3 +
 test/subread-align/test-tmp.log |  34 +--
 5 files changed, 404 insertions(+), 128 deletions(-)

diff --git a/src/core.c b/src/core.c
index 69a00a2..bb10687 100644
--- a/src/core.c
+++ b/src/core.c
@@ -82,12 +82,16 @@ void warning_file_limit()
 	}
 }
 
-void print_in_box(int line_width, int is_boundary, int is_center, char * pattern,...)
+
+void print_in_box(int line_width, int is_boundary, int options, char * pattern,...)
 {
+	int put_color_for_colon, is_center;
 	va_list args;
 	va_start(args , pattern);
 	char is_R_linebreak=0, * content, *out_line_buff;
 
+	put_color_for_colon = (options & PRINT_BOX_NOCOLOR_FOR_COLON)?0:1;
+	is_center = (options & PRINT_BOX_CENTER)?1:0;
 	content= malloc(1000);
 	out_line_buff= malloc(1000);
 	out_line_buff[0]=0;
@@ -224,7 +228,7 @@ void print_in_box(int line_width, int is_boundary, int is_center, char * pattern
 				break;
 			}
 		}
-		if(col1w>0 && col1w < content_len-1)
+		if(col1w>0 && col1w < content_len-1 && put_color_for_colon)
 		{
 			content[col1w+1]=0;
 			strcat(out_line_buff,content);
@@ -255,6 +259,7 @@ void print_in_box(int line_width, int is_boundary, int is_center, char * pattern
 
 
 
+
 int show_summary(global_context_t * global_context)
 {
 
diff --git a/src/makefile.version b/src/makefile.version
index 80f7d3f..296e933 100644
--- a/src/makefile.version
+++ b/src/makefile.version
@@ -1,3 +1,3 @@
-SUBREAD_VERSION="1.4.6-p2"
+SUBREAD_VERSION="1.4.6-p3"
 STATIC_MAKE=
 #STATIC_MAKE= -static
diff --git a/src/readSummary.c b/src/readSummary.c
index 3a21da5..72812ed 100644
--- a/src/readSummary.c
+++ b/src/readSummary.c
@@ -89,6 +89,8 @@ typedef struct
 	unsigned long long unassigned_duplicate;
 } fc_read_counters;
 
+typedef unsigned long long read_count_type_t;
+
 typedef struct
 {
 	unsigned short thread_id;
@@ -98,7 +100,8 @@ typedef struct
 	unsigned long long int all_reads;
 	//unsigned short current_read_length1;
 	//unsigned short current_read_length2;
-	unsigned int * count_table;
+	read_count_type_t * count_table;
+	read_count_type_t unpaired_fragment_no;
 	unsigned int chunk_read_ptr;
 	pthread_t thread_object;
 
@@ -147,7 +150,8 @@ typedef struct
 typedef struct
 {
 	int is_gene_level;
-	int is_paired_end_data;
+	int is_paired_end_input_file;
+	int is_paired_end_mode_assign;
 	int is_multi_overlap_allowed;
 	int is_strand_checked;
 	int is_both_end_required;
@@ -157,12 +161,15 @@ typedef struct
 	int is_input_file_resort_needed;
 	int is_SAM_file;
 	int is_read_details_out;
+	int is_SEPEmix_warning_shown;
 	int is_unpaired_warning_shown;
 	int is_stake_warning_shown;
 	int is_split_alignments_only;
 	int is_duplicate_ignored;
+	int do_not_sort;
 	int reduce_5_3_ends_to_one;
 	int isCVersion;
+	int use_fraction_multi_mapping;
 
 	int min_mapping_quality_score;
 	int min_paired_end_distance;
@@ -325,13 +332,20 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
 	print_in_box(80,0,0,"");
 	print_in_box(80,0,0,"                Threads : %d", global_context->thread_number);
 	print_in_box(80,0,0,"                  Level : %s level", global_context->is_gene_level?"meta-feature":"feature");
-	print_in_box(80,0,0,"             Paired-end : %s", global_context->is_paired_end_data?"yes":"no");
+	print_in_box(80,0,0,"             Paired-end : %s", global_context->is_paired_end_mode_assign?"yes":"no");
+	if(global_context -> do_not_sort && global_context->is_paired_end_mode_assign)
+	{
+		print_in_box(80,0,0,"       Sorting PE Reads : never");
+		print_in_box(80,0,0,"");
+	}
+
 	print_in_box(80,0,0,"        Strand specific : %s", global_context->is_strand_checked?(global_context->is_strand_checked==1?"yes":"inversed"):"no");
 	char * multi_mapping_allow_mode = "not counted";
 	if(global_context->is_multi_mapping_allowed == ALLOW_PRIMARY_MAPPING)
 		multi_mapping_allow_mode = "primary only";
 	else if(global_context->is_multi_mapping_allowed == ALLOW_ALL_MULTI_MAPPING)
-		multi_mapping_allow_mode = "counted";
+		multi_mapping_allow_mode = global_context -> use_fraction_multi_mapping?"counted (as fractions)": "counted (as integer)";
+
 	print_in_box(80,0,0,"     Multimapping reads : %s", multi_mapping_allow_mode);
 	print_in_box(80,0,0,"Multi-overlapping reads : %s", global_context->is_multi_overlap_allowed?"counted":"not counted");
 	if(global_context -> is_split_alignments_only)
@@ -345,7 +359,7 @@ void print_FC_configuration(fc_thread_global_context_t * global_context, char *
 	if(global_context -> is_duplicate_ignored)
 		print_in_box(80,0,0,"       Duplicated Reads : ignored");
 
-	if(global_context->is_paired_end_data)
+	if(global_context->is_paired_end_mode_assign)
 	{
 		print_in_box(80,0,0,"");
 		print_in_box(80,0,0,"         Chimeric reads : %s", global_context->is_chimertc_disallowed?"not counted":"counted");
@@ -378,7 +392,7 @@ void print_FC_results(fc_thread_global_context_t * global_context)
 	if(0){
 		print_in_box(80,1,1,"Summary");
 		print_in_box(80,0,0,"");
-		if(global_context->is_paired_end_data)
+		if(global_context->is_paired_end_mode_assign)
 			print_in_box(80,0,0,"        All fragments : %llu", global_context -> all_reads);
 		else
 			print_in_box(80,0,0,"            All reads : %llu", global_context -> all_reads);
@@ -388,7 +402,7 @@ void print_FC_results(fc_thread_global_context_t * global_context)
 		else
 			print_in_box(80,0,0,"             Features : %u", global_context -> exontable_exons);
 
-		if(global_context->is_paired_end_data)
+		if(global_context->is_paired_end_mode_assign)
 			print_in_box(80,0,0,"   Assigned fragments : %llu", global_context -> read_counters.assigned_reads);
 		else
 			print_in_box(80,0,0,"       Assigned reads : %llu", global_context -> read_counters.assigned_reads);
@@ -983,6 +997,98 @@ int strcmp_slash(char * s1, char * s2)
 	return nch != *s2;
 }
 
+#define NH_FRACTION_INT 65536
+
+unsigned int calc_fixed_fraction(int nh){
+	if(nh==1) return NH_FRACTION_INT;
+	else if(nh == 2) return NH_FRACTION_INT>>1;
+	else return NH_FRACTION_INT / nh; 
+}
+
+
+int calc_float_fraction(read_count_type_t score, read_count_type_t * integer_count, double * float_count){
+	if(score % NH_FRACTION_INT == 0){
+		(*integer_count) = score / NH_FRACTION_INT;
+		return 0;
+	}else{
+		(*float_count) = score * 1./NH_FRACTION_INT;
+		return 1;
+	}
+}
+
+
+void print_read_wrapping(char * rl, int is_second){
+	int refill_spaces = 3;
+
+	int read_length = 0, x1 = 0, spaces=0;
+
+	for(x1 = 0; x1 < 3100; x1++){
+		if(rl[x1]==0 && rl[x1+1]==0)break;
+		if(rl[x1]=='0' || rl[x1]=='\t') spaces++;
+		read_length ++;
+	}
+
+	char *out_buf1 = malloc(read_length + spaces * refill_spaces + 1), out_buf2[100];
+	int ox=0;
+
+	for(x1 = 0; x1 < 3000; x1++){
+		if(rl[x1]=='\n' || (rl[x1]==0 && rl[x1+1]==0)){
+			out_buf1[ox]=0;
+			break;
+		} else if((rl[x1]==0 && rl[x1+1]!=0) || rl[x1] == '\t'){
+			int x2;
+			for(x2 = 0; x2 < refill_spaces ; x2++){
+				out_buf1[ox]=' ';
+				ox++;
+			}
+		} else {
+			out_buf1[ox]=rl[x1];
+			ox++;
+		}
+	}
+	out_buf1[ox] = 0;
+
+	x1=0;
+
+	while(1){
+		int x2;
+		for(x2 = 0; x2 < 67 ; x2 ++){
+			char nch = out_buf1[x1];
+			if(nch == 0) break;
+			out_buf2[x2] = nch;
+			x1++;
+		}
+		out_buf2[x2] = 0;
+
+		print_in_box(80,0,PRINT_BOX_NOCOLOR_FOR_COLON,"      %s", out_buf2);
+		if(out_buf1[x1] == 0)break;
+	}
+
+	free(out_buf1);
+
+}
+
+void report_unpair_warning(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context, int * this_noproperly_paired_added){
+	//printf("WARN:%d [%d]\n", global_context->is_unpaired_warning_shown, thread_context -> thread_id);
+	if(!global_context->is_unpaired_warning_shown)
+	{
+		global_context->is_unpaired_warning_shown=1;
+		print_in_box(80,0,0,"   Found reads that are not properly paired.");
+		print_in_box(80,0,0,"   (missing mate or the mate is not the next read)");
+
+		if(global_context -> do_not_sort){
+			print_in_box(85,0,0,"   %c[31mHowever, the reads will not be re-ordered.", 27);
+		}else{
+			global_context->redo = 1;
+		}
+		print_in_box(80,0,0,"   Below are the two reads that are not properly paired:");
+		print_read_wrapping(thread_context -> line_buffer1,0);
+		print_read_wrapping(thread_context -> line_buffer2,1);
+
+	}
+	if(0==(*this_noproperly_paired_added))thread_context -> unpaired_fragment_no++;
+	(*this_noproperly_paired_added) = 1;
+}
 
 void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_thread_context_t * thread_context)
 {
@@ -995,15 +1101,17 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 	short * hits_total_length1 = thread_context -> hits_total_length1 ,  * hits_total_length2 = thread_context -> hits_total_length2;
 
 	int is_second_read;
+	int maximum_NH_value = 1;
 	int skipped_for_exonic = 0;
 	int first_read_quality_score = 0;
+	int this_noproperly_paired_added = 0;
 
 	thread_context->all_reads++;
 	//if(thread_context->all_reads>1000000) printf("TA=%llu\n%s\n",thread_context->all_reads, thread_context -> line_buffer1);
 
 	for(is_second_read = 0 ; is_second_read < 2; is_second_read++)
 	{
-		if(is_second_read && !global_context -> is_paired_end_data) break;
+		if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
 
 		char * line = is_second_read? thread_context -> line_buffer2:thread_context -> line_buffer1;
 	
@@ -1028,17 +1136,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 				}
 				//printf("R1=%s; R2=%s\n",read_name,read_name1 );
 				if(strcmp_slash(read_name,read_name1)!=0)
-				{
-					//printf("WARN:%d [%d]\n", global_context->is_unpaired_warning_shown, thread_context -> thread_id);
-					if(!global_context->is_unpaired_warning_shown)
-					{
-				//	printf("RV:%s,%s\n", read_name, read_name1);
-						global_context->is_unpaired_warning_shown=1;
-						global_context->redo = 1;
-						print_in_box(80,0,0,"   Found reads that are not properly paired.");
-						print_in_box(80,0,0,"   (missing mate or the mate is not the next read)");
-					}
-				}
+					report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
 			}
 		}
 		else
@@ -1052,18 +1150,31 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		if(is_second_read == 0)
 		{
 			//skip the read if unmapped (its mate will be skipped as well if paired-end)
-			if( ((!global_context -> is_paired_end_data) &&  (alignment_masks & SAM_FLAG_UNMAPPED) ) ||
-			    ((alignment_masks & SAM_FLAG_UNMAPPED)   &&  (alignment_masks & SAM_FLAG_MATE_UNMATCHED) && global_context -> is_paired_end_data) ||
-			    (((alignment_masks & SAM_FLAG_UNMAPPED) || (alignment_masks & SAM_FLAG_MATE_UNMATCHED)) && global_context -> is_paired_end_data && global_context -> is_both_end_required)
+			if( ((!global_context -> is_paired_end_mode_assign) &&  (alignment_masks & SAM_FLAG_UNMAPPED) ) ||
+			    ((alignment_masks & SAM_FLAG_UNMAPPED)   &&  (alignment_masks & SAM_FLAG_MATE_UNMATCHED) && global_context -> is_paired_end_mode_assign) ||
+			    (((alignment_masks & SAM_FLAG_UNMAPPED) || (alignment_masks & SAM_FLAG_MATE_UNMATCHED)) && global_context -> is_paired_end_mode_assign && global_context -> is_both_end_required)
 			  ){
 				thread_context->read_counters.unassigned_unmapped ++;
 
 				if(global_context -> SAM_output_fp)
 					fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Unmapped\t*\t*\n", read_name);
+
+				if(global_context -> is_paired_end_mode_assign){
+					char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+					if(strcmp_slash(read_name,read_name2)!=0)
+						report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+				}
 				return;	// do nothing if a read is unmapped, or the first read in a pair of reads is unmapped.
 			}
 		}
 
+		if(global_context -> is_paired_end_mode_assign && (!global_context ->is_SEPEmix_warning_shown)){
+			if(((!global_context -> is_paired_end_input_file)  && ( alignment_masks & SAM_FLAG_PAIRED_TASK )) || ((global_context -> is_paired_end_input_file)  && 0 == ( alignment_masks & SAM_FLAG_PAIRED_TASK ))){
+				print_in_box(85,0,0,"   %c[31mBoth single-end and paired-end reads were found.", 27);
+				global_context ->is_SEPEmix_warning_shown = 1;
+			}
+		}
+
 
 
 		read_chr = strtok_r(NULL,"\t", &tmp_tok_ptr);
@@ -1085,17 +1196,23 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 			int mapping_qual =atoi(mapping_qual_str);
 
 			//printf("SECOND=%d; FIRST=%d; THIS=%d; Q=%d\n", is_second_read, first_read_quality_score, mapping_qual, );
-			if(( mapping_qual < global_context -> min_mapping_quality_score  && ! global_context -> is_paired_end_data)||( is_second_read  && max( first_read_quality_score, mapping_qual ) < global_context -> min_mapping_quality_score))
+			if(( mapping_qual < global_context -> min_mapping_quality_score  && ! global_context -> is_paired_end_mode_assign)||( is_second_read  && max( first_read_quality_score, mapping_qual ) < global_context -> min_mapping_quality_score))
 			{
 				thread_context->read_counters.unassigned_mappingquality ++;
 
+				if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
+					char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+					if(strcmp_slash(read_name,read_name2)!=0)
+						report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+				}
+
 				if(global_context -> SAM_output_fp)
 				{
 					fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MappingQuality\t*\tMapping_Quality=%d,%d\n", read_name, first_read_quality_score, mapping_qual);
 				}
 				return;
 			}
-			if(is_second_read==0 && global_context -> is_paired_end_data)
+			if(is_second_read==0 && global_context -> is_paired_end_mode_assign)
 			{
 				first_read_quality_score = mapping_qual;
 			}
@@ -1114,7 +1231,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 		}
 
-		if(is_second_read == 0 && global_context -> is_paired_end_data && 
+		if(is_second_read == 0 && global_context -> is_paired_end_mode_assign && 
 	   	  (global_context -> is_PE_distance_checked || global_context -> is_chimertc_disallowed)
 		  )
 		{
@@ -1138,6 +1255,13 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 				{
 					if(global_context -> is_PE_distance_checked && ((fragment_length > global_context -> max_paired_end_distance) || (fragment_length < global_context -> min_paired_end_distance)))
 					{
+
+						if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
+							char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+							if(strcmp_slash(read_name,read_name2)!=0)
+								report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+						}
+
 						thread_context->read_counters.unassigned_fragmentlength ++;
 
 						if(global_context -> SAM_output_fp)
@@ -1149,6 +1273,13 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 				{
 					if(global_context -> is_chimertc_disallowed)
 					{
+
+						if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
+							char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+							if(strcmp_slash(read_name,read_name2)!=0)
+								report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+						}
+
 						thread_context->read_counters.unassigned_chimericreads ++;
 
 						if(global_context -> SAM_output_fp)
@@ -1168,6 +1299,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		{
 			if(alignment_masks & SAM_FLAG_DUPLICATE)
 			{
+				if(global_context -> is_paired_end_mode_assign && 0==is_second_read){
+					char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+					if(strcmp_slash(read_name,read_name2)!=0)
+						report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+				}
+
 				thread_context->read_counters.unassigned_duplicate ++;
 				if(global_context -> SAM_output_fp)
 					fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Duplicate\t*\t*\n", read_name);
@@ -1179,6 +1316,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 		if(SAM_FLAG_UNMAPPED & alignment_masks) continue;
 
+		int NH_value = 1;
 		char * NH_pos = strstr(tmp_tok_ptr,"\tNH:i:");
 		if(NH_pos)
 		{
@@ -1188,16 +1326,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 				if(is_second_read && read_1_chr)
 				{
 					if((strcmp(read_1_chr, mate_chr)!=0 || mate_pos!=read_1_pos) && read_1_chr[0] != '*'  && mate_chr[0]!='*')
-					{
-				//	printf("RV:%s,%s   %d,%d\n", read_1_chr, mate_chr, mate_pos, read_1_pos);
-						if(!global_context->is_unpaired_warning_shown)
-						{
-							global_context->is_unpaired_warning_shown=1;
-							global_context->redo = 1;
-							print_in_box(80,0,0,"   Found reads that are not properly paired.");
-							print_in_box(80,0,0,"   (missing mate or mate is not the next read)");
-						}
-					}
+						report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
 				}
 				else
 				{
@@ -1214,14 +1343,37 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 					if(global_context -> SAM_output_fp)
 						fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_MultiMapping\t*\t*\n", read_name);
+
+					if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
+						char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+						if(strcmp_slash(read_name,read_name2)!=0)
+							report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+					}
 					return;
 				}
 			}
+			int nh_i, NHtmpi=0;
+			for(nh_i = 6; nh_i < 15; nh_i++){
+				char nch = NH_pos[nh_i];
+				if(isdigit(nch)){
+					NHtmpi = NHtmpi * 10 + (nch-'0');
+				}else{break; }
+			}
+			NH_value = NHtmpi;
 		}
 
+
+		maximum_NH_value = max(maximum_NH_value, NH_value);
+
 		// if a pair of reads have one secondary, the entire fragment is seen as secondary.
 		if((alignment_masks & SAM_FLAG_SECONDARY_MAPPING) && (global_context -> is_multi_mapping_allowed == ALLOW_PRIMARY_MAPPING))
 		{
+			if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
+				char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+				if(strcmp_slash(read_name,read_name2)!=0)
+					report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+			}
+
 			thread_context->read_counters.unassigned_secondary ++;
 
 			if(global_context -> SAM_output_fp)
@@ -1411,8 +1563,14 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 				if(global_context->is_split_alignments_only)
 				{
 					skipped_for_exonic ++;
-					if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_data) || (alignment_masks & 0x8))
+					if((is_second_read && skipped_for_exonic == 2) || (!global_context -> is_paired_end_mode_assign) || (alignment_masks & 0x8))
 					{
+						if(global_context -> is_paired_end_mode_assign && is_second_read == 0){
+							char * read_name2 = strtok_r(thread_context -> line_buffer2,"\t", &tmp_tok_ptr);
+							if(strcmp_slash(read_name,read_name2)!=0)
+								report_unpair_warning(global_context, thread_context, &this_noproperly_paired_added);
+						}
+
 						if(global_context -> SAM_output_fp)
 							fprintf(global_context -> SAM_output_fp,"%s\tUnassigned_Nonjunction\t*\t*\n", read_name);
 
@@ -1434,7 +1592,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		// both meta-feature mode and feature mode use the same strategy.
 		// two ends in a fragment is considered individually; the overlapping bases are not added up.
 		int ends;
-		for(ends = 0; ends < global_context -> is_paired_end_data + 1; ends++)
+		for(ends = 0; ends < global_context -> is_paired_end_mode_assign + 1; ends++)
 		{
 			long * hits_indices = ends?hits_indices2:hits_indices1;
 			short * hits_total_length = ends?hits_total_length2:hits_total_length1;
@@ -1479,10 +1637,12 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 		}
 	}
 
+	int fixed_fractional_count = global_context -> use_fraction_multi_mapping ?calc_fixed_fraction(maximum_NH_value): NH_FRACTION_INT;
+
 	if(nhits2+nhits1==1)
 	{
 		long hit_exon_id = nhits2?hits_indices2[0]:hits_indices1[0];
-		thread_context->count_table[hit_exon_id]++;
+		thread_context->count_table[hit_exon_id] += fixed_fractional_count;
 		thread_context->nreads_mapped_to_exon++;
 		if(global_context -> SAM_output_fp)
 		{
@@ -1495,7 +1655,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 	else if(nhits2 == 1 && nhits1 == 1 && hits_indices2[0]==hits_indices1[0])
 	{
 		long hit_exon_id = hits_indices1[0];
-		thread_context->count_table[hit_exon_id]++;
+		thread_context->count_table[hit_exon_id] += fixed_fractional_count;
 		thread_context->nreads_mapped_to_exon++;
 		if(global_context -> SAM_output_fp)
 		{
@@ -1510,7 +1670,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 		if(nhits2+nhits1>=MAX_HIT_NUMBER)
 		{
-			print_in_box(80,0,0,"A %s overlapped with %d features.", global_context -> is_paired_end_data?"fragment":"read", nhits2+nhits1);
+			print_in_box(80,0,0,"A %s overlapped with %d features.", global_context -> is_paired_end_mode_assign?"fragment":"read", nhits2+nhits1);
 			print_in_box(80,0,0,"Please increase MAX_HIT_NUMBER in the program");
 		}
 
@@ -1521,7 +1681,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 		for(is_second_read = 0; is_second_read < 2; is_second_read++)
 		{
-			if(is_second_read && !global_context -> is_paired_end_data) break;
+			if(is_second_read && !global_context -> is_paired_end_mode_assign) break;
 			long * hits_indices = is_second_read?hits_indices2:hits_indices1;
 			int nhits = is_second_read?nhits2:nhits1;
 			if (nhits<1) continue;
@@ -1632,7 +1792,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 
 			if(top_voters == 1 && (!global_context -> is_multi_overlap_allowed))
 			{
-				thread_context->count_table[top_voter_id]++;
+				thread_context->count_table[top_voter_id] += fixed_fractional_count;
 				thread_context->nreads_mapped_to_exon++;
 				if(global_context -> SAM_output_fp)
 				{
@@ -1661,7 +1821,7 @@ void process_line_buffer(fc_thread_global_context_t * global_context, fc_thread_
 						{
 							long tmp_voter_id = (global_context -> is_gene_level)?decision_table_exon_ids[xk1]:decision_table_ids[xk1];
 							//printf("TVID=%ld; HITID=%d\n", tmp_voter_id, xk1);
-							thread_context->count_table[tmp_voter_id]++;
+							thread_context->count_table[tmp_voter_id] += fixed_fractional_count;
 
 							if(global_context -> SAM_output_fp)
 							{
@@ -1743,7 +1903,7 @@ void * feature_count_worker(void * vargs)
 					//if(buffer_read_ptr>= global_context->input_buffer_max_size)
 					//	if(buffer_read_ptr>6*1024*1024) printf("REALLY BIG PTR:%u = %u + %u - %u\n", buffer_read_ptr, thread_context->input_buffer_write_ptr , global_context->input_buffer_max_size, thread_context->input_buffer_remainder);
 
-					for(is_second_read = 0; is_second_read < (global_context->is_paired_end_data ? 2:1); is_second_read++)
+					for(is_second_read = 0; is_second_read < (global_context->is_paired_end_mode_assign ? 2:1); is_second_read++)
 					{
 						char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
 						//printf("R=%u; WPTR=%u ;RPTR=%u\n", thread_context->input_buffer_remainder, thread_context->input_buffer_write_ptr, buffer_read_ptr);
@@ -1760,6 +1920,7 @@ void * feature_count_worker(void * vargs)
 								buffer_read_ptr = 0; 
 							if(nch=='\n' || buffer_read_bytes>2998){
 								curr_line_buff[buffer_read_bytes+1]=0;
+								curr_line_buff[buffer_read_bytes+2]=0;
 								break;
 							}
 						}
@@ -1861,7 +2022,7 @@ void * feature_count_worker(void * vargs)
 			while(PDATA_ptr < PDATA_len)
 			{
 				int is_second_read;
-				for(is_second_read = 0; is_second_read <= global_context -> is_paired_end_data; is_second_read++)
+				for(is_second_read = 0; is_second_read <= global_context -> is_paired_end_mode_assign; is_second_read++)
 				{
 					int binary_read_len, local_PDATA_ptr = PDATA_ptr;
 					char * curr_line_buff = is_second_read?thread_context -> line_buffer2:thread_context -> line_buffer1;
@@ -1883,11 +2044,12 @@ void * feature_count_worker(void * vargs)
 	}
 }
 
-void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsigned int * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter)
+void fc_thread_merge_results(fc_thread_global_context_t * global_context, read_count_type_t * nreads , unsigned long long int *nreads_mapped_to_exon, fc_read_counters * my_read_counter)
 {
 	int xk1, xk2;
 
 	long long int total_input_reads = 0 ;
+	read_count_type_t unpaired_fragment_no = 0;
 
 	(*nreads_mapped_to_exon)=0;
 
@@ -1899,6 +2061,7 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsign
 		}
 		total_input_reads += global_context -> thread_contexts[xk1].all_reads;
 		(*nreads_mapped_to_exon) += global_context -> thread_contexts[xk1].nreads_mapped_to_exon;
+		unpaired_fragment_no += global_context -> thread_contexts[xk1].unpaired_fragment_no;
 
 		global_context -> read_counters.unassigned_ambiguous += global_context -> thread_contexts[xk1].read_counters.unassigned_ambiguous;
 		global_context -> read_counters.unassigned_nofeatures += global_context -> thread_contexts[xk1].read_counters.unassigned_nofeatures;
@@ -1931,8 +2094,11 @@ void fc_thread_merge_results(fc_thread_global_context_t * global_context, unsign
 		sprintf(pct_str,"(%.1f%%%%)", (*nreads_mapped_to_exon)*100./total_input_reads);
 	else	pct_str[0]=0;
 
-	print_in_box(80,0,0,"   Total %s : %llu", global_context -> is_paired_end_data?"fragments":"reads", total_input_reads); 
-	print_in_box(pct_str[0]?81:80,0,0,"   Successfully assigned %s : %llu %s", global_context -> is_paired_end_data?"fragments":"reads", *nreads_mapped_to_exon,pct_str); 
+	if(unpaired_fragment_no){
+		print_in_box(80,0,0,"   Not properly paired fragments : %llu", unpaired_fragment_no);
+	}
+	print_in_box(80,0,0,"   Total %s : %llu", global_context -> is_paired_end_mode_assign?"fragments":"reads", total_input_reads); 
+	print_in_box(pct_str[0]?81:80,0,0,"   Successfully assigned %s : %llu %s", global_context -> is_paired_end_mode_assign?"fragments":"reads", *nreads_mapped_to_exon,pct_str); 
 	print_in_box(80,0,0,"   Running time : %.2f minutes", (miltime() - global_context -> start_time)/60);
 	print_in_box(80,0,0,"");
 }
@@ -1977,7 +2143,7 @@ HashTable * load_alias_table(char * fname)
 	return ret;
 }
 
-void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
+void fc_thread_init_global_context(fc_thread_global_context_t * global_context, unsigned int buffer_size, unsigned short threads, int line_length , int is_PE_data, int min_pe_dist, int max_pe_dist, int is_gene_level, int is_overlap_allowed, int is_strand_checked, char * output_fname, int is_sam_out, int is_both_end_required, int is_chimertc_disallowed, int is_PE_distance_checked, char *feature_name_column, char * gene_id_column, int min_map_qual_score, int is_multi_mapping_allowed, int i [...]
 {
 
 	global_context -> input_buffer_max_size = buffer_size;
@@ -1989,7 +2155,7 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
 	global_context -> isCVersion = isCVersion;
 	global_context -> is_read_details_out = is_sam_out;
 	global_context -> is_multi_overlap_allowed = is_overlap_allowed;
-	global_context -> is_paired_end_data = is_PE_data;
+	global_context -> is_paired_end_mode_assign = is_PE_data;
 	global_context -> is_gene_level = is_gene_level;
 	global_context -> is_strand_checked = is_strand_checked;
 	global_context -> is_both_end_required = is_both_end_required;
@@ -1999,8 +2165,9 @@ void fc_thread_init_global_context(fc_thread_global_context_t * global_context,
 	global_context -> is_split_alignments_only = is_split_alignments_only;
 	global_context -> is_duplicate_ignored = is_duplicate_ignored;
 	global_context -> reduce_5_3_ends_to_one = reduce_5_3_ends_to_one;
+	global_context -> do_not_sort = is_not_sort;
 	global_context -> is_SAM_file = is_SAM;
-
+	global_context -> use_fraction_multi_mapping = use_fraction_multimapping;
 
 	global_context -> thread_number = threads;
 	global_context -> min_mapping_quality_score = min_map_qual_score;
@@ -2092,7 +2259,7 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
 		global_context -> thread_contexts[xk1].input_buffer = malloc(global_context -> input_buffer_max_size);
 		global_context -> thread_contexts[xk1].thread_id = xk1;
 		global_context -> thread_contexts[xk1].chunk_read_ptr = 0;
-		global_context -> thread_contexts[xk1].count_table = calloc(sizeof(unsigned int), et_exons);
+		global_context -> thread_contexts[xk1].count_table = calloc(sizeof(read_count_type_t), et_exons);
 		global_context -> thread_contexts[xk1].nreads_mapped_to_exon = 0;
 		global_context -> thread_contexts[xk1].all_reads = 0;
 		global_context -> thread_contexts[xk1].line_buffer1 = malloc(global_context -> line_length + 2);
@@ -2100,6 +2267,7 @@ int fc_thread_start_threads(fc_thread_global_context_t * global_context, int et_
 		global_context -> thread_contexts[xk1].chro_name_buff = malloc(CHROMOSOME_NAME_LENGTH);
 		global_context -> thread_contexts[xk1].strm_buffer = malloc(sizeof(z_stream));
 
+		global_context -> thread_contexts[xk1].unpaired_fragment_no = 0;
 		global_context -> thread_contexts[xk1].read_counters.assigned_reads = 0;
 		global_context -> thread_contexts[xk1].read_counters.unassigned_ambiguous = 0;
 		global_context -> thread_contexts[xk1].read_counters.unassigned_nofeatures = 0;
@@ -2205,11 +2373,21 @@ int resort_input_file(fc_thread_global_context_t * global_context)
 }
 
 
-void fc_write_final_gene_results(fc_thread_global_context_t * global_context, int * et_geneid, char ** et_chr, long * et_start, long * et_stop, unsigned char * et_strand, const char * out_file, int features, unsigned int ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
+void BUFstrcat(char * targ, char * src, char ** buf){
+	int srclen = strlen(src);
+	if( (*buf) == NULL){
+		(*buf) = targ;
+	}
+	memcpy((*buf), src, srclen);
+	(*buf) += srclen;
+	(**buf) = 0;
+}
+
+void fc_write_final_gene_results(fc_thread_global_context_t * global_context, int * et_geneid, char ** et_chr, long * et_start, long * et_stop, unsigned char * et_strand, const char * out_file, int features, read_count_type_t ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
 {
 	int xk1;
 	int genes = global_context -> gene_name_table -> numOfElements;
-	unsigned int *gene_columns;
+	read_count_type_t *gene_columns;
 
 	FILE * fp_out = f_subr_open(out_file,"w");
 	if(!fp_out){
@@ -2241,7 +2419,7 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
 	}
 	fprintf(fp_out,"\n");
 
-	gene_columns = calloc(sizeof(unsigned int) , genes * non_empty_files);
+	gene_columns = calloc(sizeof(read_count_type_t) , genes * non_empty_files);
 	unsigned int * gene_exons_number = calloc(sizeof(unsigned int) , genes);
 	unsigned int * gene_exons_pointer = calloc(sizeof(unsigned int) , genes);
 	unsigned int * gene_exons_start = malloc(sizeof(unsigned int) * features);
@@ -2294,16 +2472,20 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
 	unsigned int * input_start_stop_list = malloc(longest_gene_exons * sizeof(int) * 2);
 	unsigned int * output_start_stop_list = malloc(longest_gene_exons * sizeof(int) * 2);
 
-	char * out_chr_list = malloc(longest_gene_exons * (1+global_context -> longest_chro_name) + 1);
-	char * out_start_list = malloc(11 * longest_gene_exons + 1);
-	char * out_end_list = malloc(11 * longest_gene_exons + 1);
-	char * out_strand_list = malloc(2 * longest_gene_exons + 1);
+	char * out_chr_list = malloc(longest_gene_exons * (1+global_context -> longest_chro_name) + 1), * tmp_chr_list = NULL;
+	char * out_start_list = malloc(11 * longest_gene_exons + 1), * tmp_start_list = NULL;
+	char * out_end_list = malloc(11 * longest_gene_exons + 1), * tmp_end_list = NULL;
+	char * out_strand_list = malloc(2 * longest_gene_exons + 1), * tmp_strand_list = NULL;
 
 	for(xk1 = 0 ; xk1 < genes; xk1++)
 	{
 		int xk2;
 		
 		memset(is_occupied,0,gene_exons_pointer[xk1]);
+		tmp_chr_list = NULL;
+		tmp_start_list = NULL;
+		tmp_end_list = NULL;
+		tmp_strand_list = NULL;
 		out_chr_list[0]=0;
 		out_start_list[0]=0;
 		out_end_list[0]=0;
@@ -2340,15 +2522,15 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
 						for(xk3=0; xk3<merged_gaps; xk3++)
 						{
 							char numbbuf[12];
-							strcat(out_chr_list, matched_chr);
-							strcat(out_chr_list, ";");
+							BUFstrcat(out_chr_list, matched_chr, &tmp_chr_list);
+							BUFstrcat(out_chr_list, ";", &tmp_chr_list);
 
 							sprintf(numbbuf,"%u;", output_start_stop_list[xk3 * 2]);
-							strcat(out_start_list, numbbuf);
+							BUFstrcat(out_start_list, numbbuf, &tmp_start_list);
 							sprintf(numbbuf,"%u;", output_start_stop_list[xk3 * 2 + 1] - 1);
-							strcat(out_end_list, numbbuf);
+							BUFstrcat(out_end_list, numbbuf, &tmp_end_list);
 							sprintf(numbbuf,"%c;", matched_strand?'-':'+');
-							strcat(out_strand_list, numbbuf);
+							BUFstrcat(out_strand_list, numbbuf, &tmp_strand_list);
 
 							gene_nonoverlap_len += output_start_stop_list[xk3 * 2 + 1] - output_start_stop_list[xk3 * 2];
 						}
@@ -2372,7 +2554,14 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
 		{
 			if(column_numbers[i_files])
 			{
-				fprintf(fp_out,"\t%u", gene_columns[i_files+non_empty_files*xk1]);
+				read_count_type_t longlong_res = 0;
+				double double_res = 0;
+				int is_double_number = calc_float_fraction(gene_columns[i_files+non_empty_files*xk1], &longlong_res, &double_res);
+				if(is_double_number){
+					fprintf(fp_out,"\t%.2f", double_res);
+				}else{
+					fprintf(fp_out,"\t%llu", longlong_res);
+				}
 			}
 		}
 		fprintf(fp_out,"\n");
@@ -2396,7 +2585,7 @@ void fc_write_final_gene_results(fc_thread_global_context_t * global_context, in
 	fclose(fp_out);
 }
 
-void fc_write_final_counts(fc_thread_global_context_t * global_context, const char * out_file, int nfiles, char * file_list, unsigned int ** column_numbers, fc_read_counters *read_counters, int isCVersion)
+void fc_write_final_counts(fc_thread_global_context_t * global_context, const char * out_file, int nfiles, char * file_list, read_count_type_t ** column_numbers, fc_read_counters *read_counters, int isCVersion)
 {
 	char fname[300];
 	int i_files, xk1;
@@ -2422,7 +2611,7 @@ void fc_write_final_counts(fc_thread_global_context_t * global_context, const ch
 	}
 
 	fprintf(fp_out,"\n");
-	char * keys [] ={ "Assigned" , "Unassigned_Ambiguity", "Unassigned_MultiMapping" ,"Unassigned_NoFeatures", "Unassigned_Unmapped", "Unassigned_MappingQuality", "Unassigned_FragementLength", "Unassigned_Chimera", "Unassigned_Secondary", "Unassigned_Nonjunction", "Unassigned_Duplicate"};
+	char * keys [] ={ "Assigned" , "Unassigned_Ambiguity", "Unassigned_MultiMapping" ,"Unassigned_NoFeatures", "Unassigned_Unmapped", "Unassigned_MappingQuality", "Unassigned_FragmentLength", "Unassigned_Chimera", "Unassigned_Secondary", "Unassigned_Nonjunction", "Unassigned_Duplicate"};
 
 	for(xk1=0; xk1<11; xk1++)
 	{
@@ -2432,9 +2621,7 @@ void fc_write_final_counts(fc_thread_global_context_t * global_context, const ch
 			unsigned long long * array_0 = (unsigned long long *)&(read_counters[i_files]);
 			unsigned long long * cntr = array_0 + xk1;
 			if(column_numbers[i_files])
-			{
-				fprintf(fp_out,"\t%llu", *(cntr));
-			}
+				fprintf(fp_out,"\t%llu", *cntr);
 		}
 		fprintf(fp_out,"\n");
 	}
@@ -2442,7 +2629,7 @@ void fc_write_final_counts(fc_thread_global_context_t * global_context, const ch
 
 	fclose(fp_out);
 }
-void fc_write_final_results(fc_thread_global_context_t * global_context, const char * out_file, int features, unsigned int ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
+void fc_write_final_results(fc_thread_global_context_t * global_context, const char * out_file, int features, read_count_type_t ** column_numbers, char * file_list, int n_input_files, fc_feature_info_t * loaded_features, int header_out)
 {
 	/* save the results */
 	FILE * fp_out;
@@ -2484,7 +2671,17 @@ void fc_write_final_results(fc_thread_global_context_t * global_context, const c
 			if(column_numbers[i_files])
 			{
 				int sorted_exon_no = loaded_features[i].sorted_order;
-				fprintf(fp_out,"\t%d", column_numbers[i_files][sorted_exon_no]);
+				unsigned long long count_frac_raw =  column_numbers[i_files][sorted_exon_no], longlong_res = 0;
+
+				double double_res = 0;
+				int is_double_number = calc_float_fraction(count_frac_raw, &longlong_res, &double_res);
+				if(is_double_number){
+					fprintf(fp_out,"\t%.2f", double_res);
+				}else{
+					fprintf(fp_out,"\t%llu", longlong_res);
+				}
+
+
 			}
 		}
 		fprintf(fp_out,"\n");
@@ -2503,6 +2700,8 @@ static struct option long_options[] =
 	{"countSplitAlignmentsOnly", no_argument, 0, 0},
 	{"debugCommand", required_argument, 0, 0},
 	{"ignoreDup", no_argument, 0, 0},
+	{"donotsort", no_argument, 0, 0},
+	{"fraction", no_argument, 0, 0},
 	{0, 0, 0, 0}
 };
 
@@ -2584,11 +2783,10 @@ void print_usage()
 	SUBREADputs("              \tthe file is the same as name of the input read file except a");
 	SUBREADputs("              \tsuffix `.featureCounts' is added.");
 	SUBREADputs("    "); 
-	SUBREADputs("    --primary \tIf specified, only primary alignments will be counted. Primary");
-	SUBREADputs("              \tand secondary alignments are identified using bit 0x100 in the");
-	SUBREADputs("              \tFlag field of SAM/BAM files. All primary alignments in a dataset");
-	SUBREADputs("              \twill be counted no matter they are from multi-mapping reads or");
-	SUBREADputs("              \tnot ('-M' is ignored). ");
+	SUBREADputs("    --minReadOverlap <int>      Specify the minimum number of overlapped bases");
+	SUBREADputs("              \trequired to assign a read to a feature. 1 by default. Negative");
+	SUBREADputs("              \tvalues are permitted, indicating a gap being allowed between a");
+	SUBREADputs("              \tread and a feature.");
 	SUBREADputs("    "); 
 	SUBREADputs("    --readExtension5 <int>      Reads are extended upstream by <int> bases from");
 	SUBREADputs("              \ttheir 5' end."); 
@@ -2596,26 +2794,32 @@ void print_usage()
 	SUBREADputs("    --readExtension3 <int>      Reads are extended upstream by <int> bases from");
 	SUBREADputs("              \ttheir 3' end."); 
 	SUBREADputs("    "); 
-	SUBREADputs("    --minReadOverlap <int>      Specify the minimum number of overlapped bases");
-	SUBREADputs("              \trequired to assign a read to a feature. 1 by default. Negative");
-	SUBREADputs("              \tvalues are permitted, indicating a gap being allowed between a");
-	SUBREADputs("              \tread and a feature.");
-	SUBREADputs("    "); 
-	SUBREADputs("    --countSplitAlignmentsOnly  If specified, only split alignments (CIGAR");
-	SUBREADputs("              \tstrings containing letter `N') will be counted. All the other");
-	SUBREADputs("              \talignments will be ignored. An example of split alignments is");
-	SUBREADputs("              \tthe exon-spanning reads in RNA-seq data.");
-	SUBREADputs("    "); 
 	SUBREADputs("    --read2pos <5:3>            The read is reduced to its 5' most base or 3'");
 	SUBREADputs("              \tmost base. Read summarization is then performed based on the");
 	SUBREADputs("              \tsingle base which the read is reduced to."); 
 	SUBREADputs("    "); 
+	SUBREADputs("    --fraction\tIf specified, a fractional count 1/n will be generated for each");
+	SUBREADputs("              \tmulti-mapping read, where n is the number of alignments (indica-");
+	SUBREADputs("              \tted by 'NH' tag) reported for the read. This option must be used");
+	SUBREADputs("              \ttogether with the '-M' option.");
+	SUBREADputs("    "); 
+	SUBREADputs("    --primary \tIf specified, only primary alignments will be counted. Primary");
+	SUBREADputs("              \tand secondary alignments are identified using bit 0x100 in the");
+	SUBREADputs("              \tFlag field of SAM/BAM files. All primary alignments in a dataset");
+	SUBREADputs("              \twill be counted no matter they are from multi-mapping reads or");
+	SUBREADputs("              \tnot ('-M' is ignored). ");
+	SUBREADputs("    "); 
 	SUBREADputs("    --ignoreDup                 If specified, reads that were marked as");
 	SUBREADputs("              \tduplicates will be ignored. Bit Ox400 in FLAG field of SAM/BAM");
 	SUBREADputs("              \tfile is used for identifying duplicate reads. In paired end");
 	SUBREADputs("              \tdata, the entire read pair will be ignored if at least one end");
 	SUBREADputs("              \tis found to be a duplicate read.");
 	SUBREADputs("    "); 
+	SUBREADputs("    --countSplitAlignmentsOnly  If specified, only split alignments (CIGAR");
+	SUBREADputs("              \tstrings containing letter `N') will be counted. All the other");
+	SUBREADputs("              \talignments will be ignored. An example of split alignments is");
+	SUBREADputs("              \tthe exon-spanning reads in RNA-seq data.");
+	SUBREADputs("    "); 
 	SUBREADputs("    Optional paired-end parameters:"); 
 	SUBREADputs("    "); 
 	SUBREADputs("    -p        \tIf specified, fragments (or templates) will be counted instead");
@@ -2643,13 +2847,17 @@ void print_usage()
 	SUBREADputs("    "); 
 	SUBREADputs("    -v        \tOutput version of the program.");
 	SUBREADputs("    "); 
+	SUBREADputs("    --donotsort   If specified, paired end reads will not be reordered even if");
+        SUBREADputs("              \treads from the same pair were found not to be next to each other");
+        SUBREADputs("              \tin the input. ");
+	SUBREADputs("    "); 
 
 
 }
 
 
 
-int readSummary_single_file(fc_thread_global_context_t * global_context, unsigned int * column_numbers, int nexons,  int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter);
+int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons,  int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter);
 
 int readSummary(int argc,char *argv[]){
 
@@ -2691,6 +2899,7 @@ int readSummary(int argc,char *argv[]){
 	29: as.numeric(reduce_5_3_ends_to_one) # 0= no reduction; 1= reduce to 5' end; 2= reduce to 3' end
 	30: debug_command # This is for debug only; RfeatureCounts should pass a space (" ") to this parameter, disabling the debug command.
 	31: as.numeric(is_duplicate_ignored) # 0 = INCLUDE DUPLICATE READS; 1 = IGNORE DUPLICATE READS (0x400 FLAG IS SET) ; "0" by default.
+	32: as.numeric(do_not_sort)   # 1 = NEVER SORT THE PE BAM/SAM FILES; 0 = SORT THE BAM/SAM FILE IF IT IS FOUND NOT SORTED.
 	 */
 
 	int isStrandChecked, isCVersion, isChimericDisallowed, isPEDistChecked, minMappingQualityScore=0, isInputFileResortNeeded, feature_block_size = 20, reduce_5_3_ends_to_one;
@@ -2711,7 +2920,7 @@ int readSummary(int argc,char *argv[]){
 	curpos = 0;
 	curchr_name = "";
 
-	int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minReadOverlap, isSplitAlignmentOnly, is_duplicate_ignored;
+	int isPE, minPEDistance, maxPEDistance, isReadSummaryReport, isBothEndRequired, isMultiMappingAllowed, fiveEndExtension, threeEndExtension, minReadOverlap, isSplitAlignmentOnly, is_duplicate_ignored, doNotSort, fractionMultiMapping;
 
 	int isSAM, isGTF, n_input_files=0;
 	char *  alias_file_name = NULL, * cmd_rebuilt = NULL;
@@ -2812,12 +3021,28 @@ int readSummary(int argc,char *argv[]){
 	else
 		is_duplicate_ignored = 0;
 
+	if(argc>32)
+		doNotSort = atoi(argv[32]);
+	else
+		doNotSort = 0;
+
+	if(argc>33)
+		fractionMultiMapping = atoi(argv[33]);
+	else
+		fractionMultiMapping = 0;
+
 
 	unsigned int buffer_size = 1024*1024*6;
 
 
 	fc_thread_global_context_t global_context;
-	fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, isSAM, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtens [...]
+	fc_thread_init_global_context(& global_context, buffer_size, thread_number, MAX_LINE_LENGTH, isPE, minPEDistance, maxPEDistance,isGeneLevel, isMultiOverlapAllowed, isStrandChecked, (char *)argv[3] , isReadSummaryReport, isBothEndRequired, isChimericDisallowed, isPEDistChecked, nameFeatureTypeColumn, nameGeneIDColumn, minMappingQualityScore,isMultiMappingAllowed, isSAM, alias_file_name, cmd_rebuilt, isInputFileResortNeeded, feature_block_size, isCVersion, fiveEndExtension, threeEndExtens [...]
+
+	if( global_context.is_multi_mapping_allowed != ALLOW_ALL_MULTI_MAPPING && global_context.use_fraction_multi_mapping)
+	{
+		SUBREADprintf("ERROR: '--fraction' option should be used together with '-M'.\nThis option should not be used when multi-mapping reads are disallowed or the primary mapping is only counted.\n");
+		return -1;
+	}
 	print_FC_configuration(&global_context, argv[1], argv[2], argv[3], global_context.is_SAM_file, isGTF, & n_input_files, isReadSummaryReport);
 
 
@@ -2860,14 +3085,14 @@ int readSummary(int argc,char *argv[]){
 	char * file_list_used = malloc(strlen(argv[2])+1);
 	strcpy(file_list_used, argv[2]);
 	char * next_fn = strtok_r(file_list_used,";", &tmp_pntr);
-	unsigned int ** table_columns = calloc( n_input_files , sizeof(int *)), i_files=0;
+	read_count_type_t ** table_columns = calloc( n_input_files , sizeof(read_count_type_t *)), i_files=0;
 	fc_read_counters * read_counters = calloc(n_input_files , sizeof(fc_read_counters)); 
 
 	for(;;){
-		int redoing, original_sorting = global_context.is_input_file_resort_needed, orininal_isPE = global_context.is_paired_end_data;
+		int redoing, original_sorting = global_context.is_input_file_resort_needed, orininal_isPE = global_context.is_paired_end_mode_assign;
 		if(next_fn==NULL || strlen(next_fn)<1) break;
 
-		unsigned int * column_numbers = calloc(nexons, sizeof(unsigned int ));
+		read_count_type_t * column_numbers = calloc(nexons, sizeof(read_count_type_t));
 
 		strcpy(global_context.input_file_name, next_fn);
 		strcpy(global_context.raw_input_file_name, next_fn);
@@ -2889,11 +3114,11 @@ int readSummary(int argc,char *argv[]){
 			if(redoing || !global_context.redo) break;
 			
 			global_context.is_input_file_resort_needed = 1;
-			memset(column_numbers, 0, nexons * sizeof(unsigned int ));
+			memset(column_numbers, 0, nexons * sizeof(read_count_type_t));
 		}
 		global_context.is_SAM_file = isSAM;
 		global_context.is_input_file_resort_needed = original_sorting;
-		global_context.is_paired_end_data = orininal_isPE;
+		global_context.is_paired_end_mode_assign = orininal_isPE;
 
 		i_files++;
 		next_fn = strtok_r(NULL, ";", &tmp_pntr);
@@ -2908,8 +3133,12 @@ int readSummary(int argc,char *argv[]){
 
 	fc_write_final_counts(&global_context, argv[3], n_input_files, argv[2], table_columns, read_counters, isCVersion);
 
+	int total_written_coulmns = 0;
 	for(i_files=0; i_files<n_input_files; i_files++)
-		if(table_columns[i_files]) free(table_columns[i_files]);
+		if(table_columns[i_files]){
+			free(table_columns[i_files]);
+			total_written_coulmns++;
+		}
 	free(table_columns);
 
 
@@ -2955,7 +3184,7 @@ int readSummary(int argc,char *argv[]){
 	free(nreads);
 
 
-	return 0;
+	return total_written_coulmns?0:-1;
 }
 
 
@@ -2970,7 +3199,7 @@ int readSummary(int argc,char *argv[]){
 
 
 
-int readSummary_single_file(fc_thread_global_context_t * global_context, unsigned int * column_numbers, int nexons,  int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter)
+int readSummary_single_file(fc_thread_global_context_t * global_context, read_count_type_t * column_numbers, int nexons,  int * geneid, char ** chr, long * start, long * stop, unsigned char * sorted_strand, char * anno_chr_2ch, char ** anno_chrs, long * anno_chr_head, long * block_end_index, long * block_min_start , long * block_max_end, fc_read_counters * my_read_counter)
 {
 	FILE *fp_in = NULL;
 	int read_length = 0;
@@ -2982,10 +3211,12 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, unsigne
 	{
 		int file_probe = is_certainly_bam_file(global_context->input_file_name, &is_first_read_PE);
 
+		
+		global_context -> is_paired_end_input_file = is_first_read_PE;
 		// a Singel-end SAM/BAM file cannot be assigned as a PE SAM/BAM file;
 		// but a PE SAM/BAM file may be assigned as a SE file if the user wishes to do so.
 		if(is_first_read_PE==0)
-				global_context -> is_paired_end_data = 0;
+				global_context -> is_paired_end_mode_assign = 0;
 
 		if(file_probe == 1){
 			global_context->is_SAM_file = 0;
@@ -3046,7 +3277,7 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, unsigne
 	// begin to load-in the data.
 	if(!global_context->redo)
 	{
-		if( global_context->is_paired_end_data)
+		if( global_context->is_paired_end_mode_assign)
 		{
 			print_in_box(80,0,0,"   Assign fragments (read pairs) to features...");
 //				print_in_box(80,0,0,"   Each fragment is counted no more than once.");
@@ -3060,7 +3291,7 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, unsigne
 	fc_thread_start_threads(global_context, nexons, geneid, chr, start, stop, sorted_strand, anno_chr_2ch, anno_chrs, anno_chr_head, block_end_index, block_min_start , block_max_end, read_length);
 
 	int buffer_pairs = global_context -> thread_number>1?512:1;
-	int isPE = global_context->is_paired_end_data;
+	int isPE = global_context->is_paired_end_mode_assign;
 	char * preload_line = malloc(sizeof(char) * (2+MAX_LINE_LENGTH)*(isPE?2:1)*buffer_pairs);
 	int preload_line_ptr;
 	int current_thread_id = 0;
@@ -3142,16 +3373,26 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, unsigne
 					read_length = read_len_tmp;
 					global_context->read_length = read_length;
 				}
+				lbuf[strlen(lbuf)+1]=0;
 			}
 
 			//printf("RRR=%d\n",ret);
-			if(!ret) break;
 			
 			//one_thread_context -> current_read_length1 = global_context->read_length;
 			//one_thread_context -> current_read_length2 = global_context->read_length;
 
-			global_context->all_reads ++;
-			process_line_buffer(global_context, one_thread_context);
+			if(is_second_read == 1 && isPE){
+				print_in_box(85,0,0,"   %c[31mThere are odd number of reads in the paired-end data.", CHAR_ESC);
+				print_in_box(80,0,0,"   Please make sure that the format is correct.");
+			}
+
+			if(ret)
+			{
+				global_context->all_reads ++;
+				process_line_buffer(global_context, one_thread_context);
+			}
+
+			if(!ret)break;
 		}
 		else if(!isSAM)
 		{
@@ -3350,11 +3591,22 @@ int readSummary_single_file(fc_thread_global_context_t * global_context, unsigne
 			if(isPE && (fresh_read_no%2>0))
 			{
 				// Safegarding -- it should not happen if the SAM file has a correct format.
-				line_length = 0;
+				//line_length = 0;
 				if( (!global_context -> redo)){
-					print_in_box(80,0,0,"   There are odd number of reads in the paired-end data.");
+					print_in_box(85,0,0,"   %c[31mThere are odd number of reads in the paired-end data.", CHAR_ESC);
 					print_in_box(80,0,0,"   Please make sure that the format is correct.");
 				}
+				if(line_length > 0){
+					int xx1, enters = 0;
+					for(xx1 = line_length; xx1 >=0; xx1--){
+						if( preload_line[xx1]=='\n' ) enters ++;
+						if(2 == enters){
+							line_length = xx1+1;
+							break;
+						}
+					}
+					if(xx1 <= 0) line_length = 0;
+				}
 			}
 
 			//printf("FRR=%d\n%s\n", fresh_read_no, preload_line);
@@ -3440,7 +3692,7 @@ int main(int argc, char ** argv)
 int feature_count_main(int argc, char ** argv)
 #endif
 {
-	char * Rargv[32];
+	char * Rargv[34];
 	char annot_name[300];
 	char * out_name = malloc(300);
 	char * alias_file_name = malloc(300);
@@ -3472,7 +3724,9 @@ int feature_count_main(int argc, char ** argv)
 	int is_Multi_Mapping_Allowed = 0;
 	int is_Split_Alignment_Only = 0;
 	int is_duplicate_ignored = 0;
+	int do_not_sort = 0;
 	int reduce_5_3_ends_to_one = 0;
+	int use_fraction_multimapping = 0;
 	int threads = 1;
 	int isGTF = 1;
 	char nthread_str[4];
@@ -3624,6 +3878,11 @@ int feature_count_main(int argc, char ** argv)
 					is_duplicate_ignored = 1 ;
 				}
 
+				if(strcmp("fraction", long_options[option_index].name)==0)
+				{
+					use_fraction_multimapping = 1;
+				}
+
 				if(strcmp("read2pos", long_options[option_index].name)==0)
 				{
 					if(optarg[0]=='3')
@@ -3633,6 +3892,11 @@ int feature_count_main(int argc, char ** argv)
 						
 				}				
 
+				if(strcmp("donotsort", long_options[option_index].name)==0)
+				{
+					do_not_sort = 1;
+				}
+
 				if(strcmp("countSplitAlignmentsOnly", long_options[option_index].name)==0)
 				{
 					is_Split_Alignment_Only = 1;
@@ -3717,14 +3981,16 @@ int feature_count_main(int argc, char ** argv)
 	Rargv[29] = (reduce_5_3_ends_to_one == 0?"0":(reduce_5_3_ends_to_one==REDUCE_TO_3_PRIME_END?"3":"5"));
 	Rargv[30] = debug_command;
 	Rargv[31] = is_duplicate_ignored?"1":"0";
-	readSummary(32, Rargv);
+	Rargv[32] = do_not_sort?"1":"0";
+	Rargv[33] = use_fraction_multimapping?"1":"0";
+	int retvalue = readSummary(34, Rargv);
 
 	free(very_long_file_names);
 	free(out_name);
 	free(alias_file_name);
 	free(cmd_rebuilt);
 
-	return 0;
+	return retvalue;
 
 }
 
diff --git a/src/subread.h b/src/subread.h
index 9e43cc8..00f8b03 100644
--- a/src/subread.h
+++ b/src/subread.h
@@ -32,6 +32,9 @@
 
 #include "hashtable.h" 
 
+#define PRINT_BOX_NOCOLOR_FOR_COLON 2
+#define PRINT_BOX_CENTER 1
+
 #define SAM_FLAG_PAIRED_TASK	0x01
 #define SAM_FLAG_FIRST_READ_IN_PAIR 0x40
 #define SAM_FLAG_SECOND_READ_IN_PAIR 0x80
diff --git a/test/subread-align/test-tmp.log b/test/subread-align/test-tmp.log
index 3feaeb3..268b047 100644
--- a/test/subread-align/test-tmp.log
+++ b/test/subread-align/test-tmp.log
@@ -1,47 +1,49 @@
+a87c3f05f25477325ea8f2742f05231b  ../small1.00.b.array
+b5bf8ea4c82873ab98cc52e334c5e53b  ../small1.00.b.tab
 *************************************************
 *** SINGLE-END READS NO ERROR              ******
 *************************************************
 
-unmatched= 563 ;   matched= 19435 ;     unmapped= 0 ;     reads= 19998     ;NN= 0
-accuracy= 0.971847184718  ;    sensitivity= 1.0
+unmatched= 549 ;   matched= 19418 ;     unmapped= 31 ;     reads= 19998     ;NN= 0
+accuracy= 0.972504632644  ;    sensitivity= 0.998449844984
 paired_match= 0   ;     paired= 0.0
 *************************************************
 *** SINGLE-END READS NO ERROR NO DUP       ******
 *************************************************
 
-unmatched= 57 ;   matched= 18791 ;     unmapped= 1150 ;     reads= 19998     ;NN= 0
-accuracy= 0.996975806452  ;    sensitivity= 0.942494249425
+unmatched= 43 ;   matched= 18774 ;     unmapped= 1181 ;     reads= 19998     ;NN= 0
+accuracy= 0.997714832332  ;    sensitivity= 0.940944094409
 paired_match= 0   ;     paired= 0.0
 
 *************************************************
 ***  READS WITH NO ERROR                   ******
 *************************************************
 
-unmatched= 630 ;   matched= 39366 ;     unmapped= 0 ;     reads= 39996     ;NN= 0
-accuracy= 0.984248424842  ;    sensitivity= 1.0
-paired_match= 39308   ;     paired= 0.998526647361
+unmatched= 603 ;   matched= 39342 ;     unmapped= 51 ;     reads= 39996     ;NN= 0
+accuracy= 0.984904243335  ;    sensitivity= 0.998724872487
+paired_match= 39260   ;     paired= 0.997915713487
 
 *************************************************
 ***  READS NO ERROR, NO DUPLICATED REPORT  ******
 *************************************************
 
-unmatched= 188 ;   matched= 38790 ;     unmapped= 1018 ;     reads= 39996     ;NN= 0
-accuracy= 0.995176766381  ;    sensitivity= 0.974547454745
-paired_match= 38710   ;     paired= 0.997937612787
+unmatched= 165 ;   matched= 38786 ;     unmapped= 1045 ;     reads= 39996     ;NN= 0
+accuracy= 0.9957639085  ;    sensitivity= 0.973872387239
+paired_match= 38690   ;     paired= 0.997524880111
 
 *************************************************
 ***  READS WITH ONLY SEQUENCING ERROR      ******
 *************************************************
 
-unmatched= 920 ;   matched= 39068 ;     unmapped= 12 ;     reads= 40000     ;NN= 0
-accuracy= 0.976993097929  ;    sensitivity= 0.9997
-paired_match= 38848   ;     paired= 0.994368792874
+unmatched= 655 ;   matched= 33542 ;     unmapped= 5803 ;     reads= 40000     ;NN= 0
+accuracy= 0.980846273065  ;    sensitivity= 0.854925
+paired_match= 28692   ;     paired= 0.855405163675
 
 *************************************************
 ***  READS WITH SEQUENCING ERROR AND MUTATION ***
 ***  SUBREAD IS RUN WITH LONG INDEL DETECTION ***
 *************************************************
 
-unmatched= 862 ;   matched= 39131 ;     unmapped= 7 ;     reads= 40002     ;NN= 0
-accuracy= 0.97844622809  ;    sensitivity= 0.999775011249
-paired_match= 38902   ;     paired= 0.994147862309
+unmatched= 628 ;   matched= 33205 ;     unmapped= 6167 ;     reads= 40002     ;NN= 0
+accuracy= 0.981438240771  ;    sensitivity= 0.845782710864
+paired_match= 28042   ;     paired= 0.84451136877

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/subread.git



More information about the debian-med-commit mailing list