[med-svn] [libssw] 01/03: Imported Upstream version 1.1

Sascha Steinbiss satta at debian.org
Mon Aug 22 18:25:51 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository libssw.

commit f863fae12dca70c124d2011a075e8d8b9cd3609a
Author: Sascha Steinbiss <satta at debian.org>
Date:   Mon Aug 22 18:03:48 2016 +0000

    Imported Upstream version 1.1
---
 README.md       |   5 ++-
 src/Makefile    |   2 +-
 src/main.c      |   9 ++--
 src/ssw.c       | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 src/ssw.h       |  56 ++++++++++++------------
 src/ssw_cpp.cpp |   1 -
 6 files changed, 163 insertions(+), 42 deletions(-)

diff --git a/README.md b/README.md
index 83e67b7..3534f6a 100644
--- a/README.md
+++ b/README.md
@@ -22,11 +22,14 @@ Wan-Ping Lee <wanping.lee at gmail.com>
 Yongan Zhao <zhaoyanswill at gmail.com>   
 Daniel Cameron <cameron.d at wehi.edu.au>  
 
-Last revision: 04/05/2016
+Last revision: 06/29/2016
 
 ##Overview
 SSW is a fast implementation of the Smith-Waterman algorithm, which uses the Single-Instruction Multiple-Data (SIMD) instructions to parallelize the algorithm at the instruction level. SSW library provides an API that can be flexibly used by programs written in C, C++ and other languages. We also provide a software that can do protein and genome alignment directly. Current version of our implementation is ~50 times faster than an ordinary Smith-Waterman. It can return the Smith-Waterman  [...]
 
+The Debian package of this library can be achieved here:
+https://tracker.debian.org/pkg/libssw
+
 Note: When SSW open a gap, the gap open penalty alone is applied.
 
 ## C/C++ interface
diff --git a/src/Makefile b/src/Makefile
index f726378..f4e763b 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,6 +1,6 @@
 CC = gcc
 CXX = g++
-CFLAGS := -Wall -pipe -O3
+CFLAGS := -Wall -pipe -O2
 CXXFLAGS := $(CFLAGS)
 LOBJS = ssw.o
 LCPPOBJS = ssw_cpp.o
diff --git a/src/main.c b/src/main.c
index 2ed7cc1..b03c219 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,7 +1,7 @@
 /*  main.c
  *  Created by Mengyao Zhao on 06/23/11.
- *	Version 0.1.5
- *  Last revision by Mengyao Zhao on 06/24/16.
+ *	Version 1.0
+ *  Last revision by Mengyao Zhao on 07/19/16.
  */
 
 #include <stdlib.h>
@@ -56,7 +56,7 @@ static void reverse_comple(const char* seq, char* rc) {
 	if (start == end) rc[start] = (char)rc_table[(int8_t)seq[start]];
 }
 
-static void ssw_write (const s_align* a,
+static void ssw_write (s_align* a,
 			const kseq_t* ref_seq,
 			const kseq_t* read,
 			const char* read_seq,	// strand == 0: original read; strand == 1: reverse complement read
@@ -64,6 +64,7 @@ static void ssw_write (const s_align* a,
 			int8_t strand,	// 0: forward aligned ; 1: reverse complement aligned
 			int8_t sam) {	// 0: Blast like output; 1: Sam format output
 
+	int32_t mismatch;
 	if (sam == 0) {	// Blast like output
 		fprintf(stdout, "target_name: %s\nquery_name: %s\noptimal_alignment_score: %d\t", ref_seq->name.s, read->name.s, a->score1);
 		if (a->score2 > 0) fprintf(stdout, "suboptimal_alignment_score: %d\t", a->score2);
@@ -161,11 +162,13 @@ end:
 			if (strand) fprintf(stdout, "16\t");
 			else fprintf(stdout, "0\t");
 			fprintf(stdout, "%s\t%d\t%d\t", ref_seq->name.s, a->ref_begin1 + 1, mapq);
+			mismatch = mark_mismatch(a->ref_begin1, a->read_begin1, a->read_end1, ref_seq->seq.s, read_seq, read->seq.l, &a->cigar, &a->cigarLen);
 			for (c = 0; c < a->cigarLen; ++c) {
 				char letter = cigar_int_to_op(a->cigar[c]);
 				uint32_t length = cigar_int_to_len(a->cigar[c]);
 				fprintf(stdout, "%lu%c", (unsigned long)length, letter);
 			}
+	//		fprintf(stderr, "%s\tmismatch: %d\n", read->name.s, mismatch);
 			fprintf(stdout, "\t*\t0\t0\t");
 			for (c = a->read_begin1; c <= a->read_end1; ++c) fprintf(stdout, "%c", read_seq[c]);
 			fprintf(stdout, "\t");
diff --git a/src/ssw.c b/src/ssw.c
index 825c39b..b1fa2d1 100644
--- a/src/ssw.c
+++ b/src/ssw.c
@@ -31,7 +31,7 @@
  *  Created by Mengyao Zhao on 6/22/10.
  *  Copyright 2010 Boston College. All rights reserved.
  *	Version 0.1.4
- *	Last revision by Mengyao Zhao on 02/11/16.
+ *	Last revision by Mengyao Zhao on 07/19/16.
  *
  */
 
@@ -529,6 +529,37 @@ end:
 	return bests;
 }
 
+/*!     @function               Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
+        @param  length          length of CIGAR
+        @param  op_letter       CIGAR operation character ('M', 'I', etc)
+        @return                 32-bit unsigned integer, representing encoded CIGAR operation and length
+*/
+uint32_t to_cigar_int (uint32_t length, char op_letter)
+{
+        switch (op_letter) {
+                case 'M': /* alignment match (can be a sequence match or mismatch */
+                default:
+                        return length << BAM_CIGAR_SHIFT;
+                case 'S': /* soft clipping (clipped sequences present in SEQ) */
+                        return (length << BAM_CIGAR_SHIFT) | (4u);
+                case 'D': /* deletion from the reference */
+                        return (length << BAM_CIGAR_SHIFT) | (2u);
+                case 'I': /* insertion to the reference */
+                        return (length << BAM_CIGAR_SHIFT) | (1u);
+                case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
+                        return (length << BAM_CIGAR_SHIFT) | (5u);
+                case 'N': /* skipped region from the reference */
+                        return (length << BAM_CIGAR_SHIFT) | (3u);
+                case 'P': /* padding (silent deletion from padded reference) */
+                        return (length << BAM_CIGAR_SHIFT) | (6u);
+                case '=': /* sequence match */
+                        return (length << BAM_CIGAR_SHIFT) | (7u);
+                case 'X': /* sequence mismatch */
+                        return (length << BAM_CIGAR_SHIFT) | (8u);
+        }
+        return (uint32_t)-1; // This never happens
+}
+
 static cigar* banded_sw (const int8_t* ref,
 				 const int8_t* read,
 				 int32_t refLen,
@@ -855,13 +886,98 @@ void align_destroy (s_align* a) {
 	free(a->cigar);
 	free(a);
 }
-/*
-inline char cigar_int_to_op(uint32_t cigar_int) {
-	return UNLIKELY((cigar_int & 0xfU) > 8) ? 'M': MAPSTR[cigar_int & 0xfU];
+
+uint32_t* add_cigar (uint32_t* new_cigar, int32_t* p, int32_t* s, uint32_t length, char op) {
+	if ((*p) >= (*s)) {
+		++(*s);
+		kroundup32(*s);
+		new_cigar = (uint32_t*)realloc(new_cigar, (*s)*sizeof(uint32_t));
+	}
+	new_cigar[(*p) ++] = to_cigar_int(length, op);
+	return new_cigar;
 }
 
+uint32_t* store_previous_m (int8_t choice,	// 0: current not M, 1: current match, 2: current mismatch
+					   uint32_t* length_m,
+					   uint32_t* length_x,
+					   int32_t* p,
+					   int32_t* s,
+					   uint32_t* new_cigar) {
+
+	if ((*length_m) && (choice == 2 || !choice)) {
+		new_cigar = add_cigar (new_cigar, p, s, (*length_m), '='); 
+		(*length_m) = 0;
+	} else if ((*length_x) && (choice == 1 || !choice)) { 
+		new_cigar = add_cigar (new_cigar, p, s, (*length_x), 'X'); 
+		(*length_x) = 0;
+	}
+	return new_cigar;
+}				
+
+/*! @function:
+     1. Calculate the number of mismatches.
+     2. Modify the cigar string:
+         differentiate matches (=) and mismatches(X); add softclip(S) at the beginning and ending of the original cigar.
+    @return:
+     The number of mismatches.
+	 The cigar and cigarLen are modified.
+*/
+int32_t mark_mismatch (int32_t ref_begin1,
+					   int32_t read_begin1,
+					   int32_t read_end1,
+					   const char* ref,
+					   const char* read,
+					   int32_t readLen,
+					   uint32_t** cigar,
+					   int32_t* cigarLen) {
+
+	int32_t mismatch_length = 0, p = 0, i, length, j, s = *cigarLen + 2;
+	uint32_t *new_cigar = (uint32_t*)malloc(s*sizeof(uint32_t)), length_m = 0,  length_x = 0;
+	char op;
+
+	ref += ref_begin1;
+	read += read_begin1;
+	if (read_begin1 > 0) new_cigar[p ++] = to_cigar_int(read_begin1, 'S');
+	for (i = 0; i < (*cigarLen); ++i) {
+		op = cigar_int_to_op((*cigar)[i]);
+		length = cigar_int_to_len((*cigar)[i]);
+		if (op == 'M') {
+			for (j = 0; j < length; ++j) {
+				fprintf(stderr, "ref[%d]: %c\tread[%d]: %c\n", j, *ref, j, *read);
+				if (*ref != *read) {
+					++ mismatch_length;
+					fprintf(stderr, "length_m: %d\n", length_m);
+					// the previous is match; however the current one is mismatche
+					new_cigar = store_previous_m (2, &length_m, &length_x, &p, &s, new_cigar);			
+					++ length_x;
+				} else {
+					// the previous is mismatch; however the current one is matche
+					new_cigar = store_previous_m (1, &length_m, &length_x, &p, &s, new_cigar);			
+					++ length_m;
+				}
+				++ ref;
+				++ read;
+			}
+		}else if (op == 'I') {
+			read += length;
+			mismatch_length += length;
+			new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);			
+			new_cigar = add_cigar (new_cigar, &p, &s, length, 'I'); 
+		}else if (op == 'D') {
+			ref += length;
+			mismatch_length += length;
+			new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);			
+			new_cigar = add_cigar (new_cigar, &p, &s, length, 'D'); 
+		}
+	}
+	new_cigar = store_previous_m (0, &length_m, &length_x, &p, &s, new_cigar);
+	
+	length = readLen - read_end1 - 1;
+	if (length > 0) new_cigar = add_cigar(new_cigar, &p, &s, length, 'S');
+	
+	(*cigarLen) = p;	
+	free(*cigar);
+	(*cigar) = new_cigar;
+	return mismatch_length;
+}
 
-inline uint32_t cigar_int_to_len (uint32_t cigar_int)
-{
-	return cigar_int >> BAM_CIGAR_SHIFT;
-}*/
diff --git a/src/ssw.h b/src/ssw.h
index 685ecf3..43653e6 100644
--- a/src/ssw.h
+++ b/src/ssw.h
@@ -4,7 +4,7 @@
  *  Created by Mengyao Zhao on 6/22/10.
  *  Copyright 2010 Boston College. All rights reserved.
  *	Version 0.1.4
- *	Last revision by Mengyao Zhao on 02/11/16.
+ *	Last revision by Mengyao Zhao on 07/19/16.
  *
  */
 
@@ -22,7 +22,7 @@ extern "C" {
 
 #define MAPSTR "MIDNSHP=X"
 #ifndef BAM_CIGAR_SHIFT
-#define BAM_CIGAR_SHIFT 4
+#define BAM_CIGAR_SHIFT 4u
 #endif
 
 
@@ -129,37 +129,37 @@ s_align* ssw_align (const s_profile* prof,
 */
 void align_destroy (s_align* a);
 
+/*! @function:
+     1. Calculate the number of mismatches.
+     2. Modify the cigar string:
+         differentiate matches (=), mismatches(X), and softclip(S).
+	@param	ref_begin1	0-based best alignment beginning position on the reference sequence
+	@param	read_begin1	0-based best alignment beginning position on the read sequence
+	@param	read_end1	0-based best alignment ending position on the read sequence
+	@param	ref	pointer to the reference sequence
+	@param	read	pointer to the read sequence
+	@param	readLen	length of the read
+	@param	cigar	best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2)
+	@param	cigarLen	length of the cigar string
+ 	@return:
+     The number of mismatches.
+	 The cigar and cigarLen are modified.
+*/
+int32_t mark_mismatch (int32_t ref_begin1,
+					   int32_t read_begin1,
+					   int32_t read_end1,
+					   const char* ref,
+					   const char* read,
+					   int32_t readLen,
+					   uint32_t** cigar, 
+					   int32_t* cigarLen);
+
 /*!	@function		Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
 	@param	length		length of CIGAR
 	@param	op_letter	CIGAR operation character ('M', 'I', etc)
 	@return			32-bit unsigned integer, representing encoded CIGAR operation and length
 */
-static inline uint32_t to_cigar_int (uint32_t length, char op_letter)
-{
-	switch (op_letter) {
-		case 'M': /* alignment match (can be a sequence match or mismatch */
-		default:
-			return length << BAM_CIGAR_SHIFT;
-		case 'S': /* soft clipping (clipped sequences present in SEQ) */
-			return (length << BAM_CIGAR_SHIFT) | (4u);
-		case 'D': /* deletion from the reference */
-			return (length << BAM_CIGAR_SHIFT) | (2u);
-		case 'I': /* insertion to the reference */
-			return (length << BAM_CIGAR_SHIFT) | (1u);
-		case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
-			return (length << BAM_CIGAR_SHIFT) | (5u);
-		case 'N': /* skipped region from the reference */
-			return (length << BAM_CIGAR_SHIFT) | (3u);
-		case 'P': /* padding (silent deletion from padded reference) */
-			return (length << BAM_CIGAR_SHIFT) | (6u);
-		case '=': /* sequence match */
-			return (length << BAM_CIGAR_SHIFT) | (7u);
-		case 'X': /* sequence mismatch */
-			return (length << BAM_CIGAR_SHIFT) | (8u);
-	}
-	return (uint32_t)-1; // This never happens
-}
-
+uint32_t to_cigar_int (uint32_t length, char op_letter);
 
 /*!	@function		Extract CIGAR operation character from CIGAR 32-bit unsigned integer
 	@param	cigar_int	32-bit unsigned integer, representing encoded CIGAR operation and length
diff --git a/src/ssw_cpp.cpp b/src/ssw_cpp.cpp
index 7bd3beb..4e3df7d 100644
--- a/src/ssw_cpp.cpp
+++ b/src/ssw_cpp.cpp
@@ -117,7 +117,6 @@ void CleanPreviousMOperator(
 //     1. Calculate the number of mismatches.
 //     2. Modify the cigar string:
 //         differentiate matches (M) and mismatches(X).
-//         Note that SSW does not differentiate matches and mismatches.
 // @Return:
 //     The number of mismatches.
 int CalculateNumberMismatch(

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libssw.git



More information about the debian-med-commit mailing list