[med-svn] [libseqlib] 02/04: New upstream version 1.1.1+dfsg

Andreas Tille tille at debian.org
Fri Feb 3 21:14:20 UTC 2017


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository libseqlib.

commit ff988fa7c4ef40823240d9603b4d10be89c06993
Author: Andreas Tille <tille at debian.org>
Date:   Fri Feb 3 21:58:41 2017 +0100

    New upstream version 1.1.1+dfsg
---
 SeqLib/ssw.h     | 188 ------------
 SeqLib/ssw_cpp.h | 219 --------------
 src/ssw.c        | 884 -------------------------------------------------------
 src/ssw_cpp.cpp  | 477 ------------------------------
 4 files changed, 1768 deletions(-)

diff --git a/SeqLib/ssw.h b/SeqLib/ssw.h
deleted file mode 100644
index 685ecf3..0000000
--- a/SeqLib/ssw.h
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- *  ssw.h
- *
- *  Created by Mengyao Zhao on 6/22/10.
- *  Copyright 2010 Boston College. All rights reserved.
- *	Version 0.1.4
- *	Last revision by Mengyao Zhao on 02/11/16.
- *
- */
-
-#ifndef SSW_H
-#define SSW_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <emmintrin.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif	// __cplusplus
-
-#define MAPSTR "MIDNSHP=X"
-#ifndef BAM_CIGAR_SHIFT
-#define BAM_CIGAR_SHIFT 4
-#endif
-
-
-/*!	@typedef	structure of the query profile	*/
-struct _profile;
-typedef struct _profile s_profile;
-
-/*!	@typedef	structure of the alignment result
-	@field	score1	the best alignment score
-	@field	score2	sub-optimal alignment score
-	@field	ref_begin1	0-based best alignment beginning position on reference;	ref_begin1 = -1 when the best alignment beginning
-						position is not available
-	@field	ref_end1	0-based best alignment ending position on reference
-	@field	read_begin1	0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
-						position is not available
-	@field	read_end1	0-based best alignment ending position on read
-	@field	read_end2	0-based sub-optimal alignment ending position on read
-	@field	cigar	best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2);
-					cigar = 0 when the best alignment path is not available
-	@field	cigarLen	length of the cigar string; cigarLen = 0 when the best alignment path is not available
-*/
-typedef struct {
-	uint16_t score1;
-	uint16_t score2;
-	int32_t ref_begin1;
-	int32_t ref_end1;
-	int32_t	read_begin1;
-	int32_t read_end1;
-	int32_t ref_end2;
-	uint32_t* cigar;
-	int32_t cigarLen;
-} s_align;
-
-/*!	@function	Create the query profile using the query sequence.
-	@param	read	pointer to the query sequence; the query sequence needs to be numbers
-	@param	readLen	length of the query sequence
-	@param	mat	pointer to the substitution matrix; mat needs to be corresponding to the read sequence
-	@param	n	the square root of the number of elements in mat (mat has n*n elements)
-	@param	score_size	estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
-						your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
-	@return	pointer to the query profile structure
-	@note	example for parameter read and mat:
-			If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142
-			Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be:
-			//A  C  G  T
-			  2 -2 -2 -2 //A
-			 -2  2 -2 -2 //C
-			 -2 -2  2 -2 //G
-			 -2 -2 -2  2 //T
-			mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2}
-*/
-s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size);
-
-/*!	@function	Release the memory allocated by function ssw_init.
-	@param	p	pointer to the query profile structure
-*/
-void init_destroy (s_profile* p);
-
-// @function	ssw alignment.
-/*!	@function	Do Striped Smith-Waterman alignment.
-	@param	prof	pointer to the query profile structure
-	@param	ref	pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of
-				function ssw_init
-	@param	refLen	length of the target sequence
-	@param	weight_gapO	the absolute value of gap open penalty
-	@param	weight_gapE	the absolute value of gap extension penalty
-	@param	flag	bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment
-					beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
-					< filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
-					cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
-  					will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
- 					setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only
-					the optimal and sub-optimal scores and the optimal alignment ending position will be returned.
-	@param	filters	score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
- 					decription of the flag parameter for detailed usage.)
-	@param	filterd	distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
-					the decription of the flag parameter for detailed usage.)
-	@param	maskLen	The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
-					readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
-					return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
-					alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
-					largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
-					picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
-					reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
-					score from the unmasked elements.
-	@return	pointer to the alignment result structure
-	@note	Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
-			and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
-			while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
-			0-based coordinate.
-*/
-s_align* ssw_align (const s_profile* prof,
-					const int8_t* ref,
-					int32_t refLen,
-					const uint8_t weight_gapO,
-					const uint8_t weight_gapE,
-					const uint8_t flag,
-					const uint16_t filters,
-					const int32_t filterd,
-					const int32_t maskLen);
-
-/*!	@function	Release the memory allocated by function ssw_align.
-	@param	a	pointer to the alignment result structure
-*/
-void align_destroy (s_align* a);
-
-/*!	@function		Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
-	@param	length		length of CIGAR
-	@param	op_letter	CIGAR operation character ('M', 'I', etc)
-	@return			32-bit unsigned integer, representing encoded CIGAR operation and length
-*/
-static inline uint32_t to_cigar_int (uint32_t length, char op_letter)
-{
-	switch (op_letter) {
-		case 'M': /* alignment match (can be a sequence match or mismatch */
-		default:
-			return length << BAM_CIGAR_SHIFT;
-		case 'S': /* soft clipping (clipped sequences present in SEQ) */
-			return (length << BAM_CIGAR_SHIFT) | (4u);
-		case 'D': /* deletion from the reference */
-			return (length << BAM_CIGAR_SHIFT) | (2u);
-		case 'I': /* insertion to the reference */
-			return (length << BAM_CIGAR_SHIFT) | (1u);
-		case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
-			return (length << BAM_CIGAR_SHIFT) | (5u);
-		case 'N': /* skipped region from the reference */
-			return (length << BAM_CIGAR_SHIFT) | (3u);
-		case 'P': /* padding (silent deletion from padded reference) */
-			return (length << BAM_CIGAR_SHIFT) | (6u);
-		case '=': /* sequence match */
-			return (length << BAM_CIGAR_SHIFT) | (7u);
-		case 'X': /* sequence mismatch */
-			return (length << BAM_CIGAR_SHIFT) | (8u);
-	}
-	return (uint32_t)-1; // This never happens
-}
-
-
-/*!	@function		Extract CIGAR operation character from CIGAR 32-bit unsigned integer
-	@param	cigar_int	32-bit unsigned integer, representing encoded CIGAR operation and length
-	@return			CIGAR operation character ('M', 'I', etc)
-*/
-//char cigar_int_to_op (uint32_t cigar_int);
-static inline char cigar_int_to_op(uint32_t cigar_int) 
-{
-	return (cigar_int & 0xfU) > 8 ? 'M': MAPSTR[cigar_int & 0xfU];
-}
-
-
-/*!	@function		Extract length of a CIGAR operation from CIGAR 32-bit unsigned integer
-	@param	cigar_int	32-bit unsigned integer, representing encoded CIGAR operation and length
-	@return			length of CIGAR operation
-*/
-//uint32_t cigar_int_to_len (uint32_t cigar_int);
-static inline uint32_t cigar_int_to_len (uint32_t cigar_int)
-{
-	return cigar_int >> BAM_CIGAR_SHIFT;
-}
-#ifdef __cplusplus
-}
-#endif	// __cplusplus
-
-#endif	// SSW_H
diff --git a/SeqLib/ssw_cpp.h b/SeqLib/ssw_cpp.h
deleted file mode 100644
index cdcf717..0000000
--- a/SeqLib/ssw_cpp.h
+++ /dev/null
@@ -1,219 +0,0 @@
-#ifndef COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
-#define COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
-
-#include <stdint.h>
-#include <string>
-#include <vector>
-
-namespace StripedSmithWaterman {
-
-struct Alignment {
-  uint16_t sw_score;           // The best alignment score
-  uint16_t sw_score_next_best; // The next best alignment score
-  int32_t  ref_begin;          // Reference begin position of the best alignment
-  int32_t  ref_end;            // Reference end position of the best alignment
-  int32_t  query_begin;        // Query begin position of the best alignment
-  int32_t  query_end;          // Query end position of the best alignment
-  int32_t  ref_end_next_best;  // Reference end position of the next best alignment
-  int32_t  mismatches;         // Number of mismatches of the alignment
-  std::string cigar_string;    // Cigar string of the best alignment
-  std::vector<uint32_t> cigar; // Cigar stored in the BAM format
-                               //   high 28 bits: length
-			       //   low 4 bits: M/I/D/S/X (0/1/2/4/8);
-  void Clear() {
-    sw_score           = 0;
-    sw_score_next_best = 0;
-    ref_begin          = 0;
-    ref_end            = 0;
-    query_begin        = 0;
-    query_end          = 0;
-    ref_end_next_best  = 0;
-    mismatches         = 0;
-    cigar_string.clear();
-    cigar.clear();
-  };
-};
-
-struct Filter {
-  // NOTE: No matter the filter, those five fields of Alignment will be given anyway.
-  //       sw_score; sw_score_next_best; ref_end; query_end; ref_end_next_best.
-  // NOTE: Only need score of alignments, please set 'report_begin_position'
-  //       and 'report_cigar' false.
-
-  bool report_begin_position;    // Give ref_begin and query_begin.
-                                 //   If it is not set, ref_begin and query_begin are -1.
-  bool report_cigar;             // Give cigar_string and cigar.
-                                 //   report_begin_position is automatically TRUE.
-
-  // When *report_cigar* is true and alignment passes these two filters,
-  //   cigar_string and cigar will be given.
-  uint16_t score_filter;         // score >= score_filter
-  uint16_t distance_filter;      // ((ref_end - ref_begin) < distance_filter) &&
-                                 // ((query_end - read_begin) < distance_filter)
-
-  Filter()
-    : report_begin_position(true)
-    , report_cigar(true)
-    , score_filter(0)
-    , distance_filter(32767)
-  {};
-
-  Filter(const bool& pos, const bool& cigar, const uint16_t& score, const uint16_t& dis)
-    : report_begin_position(pos)
-    , report_cigar(cigar)
-    , score_filter(score)
-    , distance_filter(dis)
-    {};
-};
-
-class Aligner {
- public:
-  // =========
-  // @function Construct an Aligner on default values.
-  //             The function will build the {A.C,G,T,N} aligner.
-  //             If you target for other character aligners, then please
-  //             use the other constructor and pass the corresponding matrix in.
-  // =========
-  Aligner(void);
-
-  // =========
-  // @function Construct an Aligner by assigning scores.
-  //             The function will build the {A.C,G,T,N} aligner.
-  //             If you target for other character aligners, then please
-  //             use the other constructor and pass the corresponding matrix in.
-  // =========
-  Aligner(const uint8_t& match_score,
-          const uint8_t& mismatch_penalty,
-	  const uint8_t& gap_opening_penalty,
-	  const uint8_t& gap_extending_penalty);
-
-  // =========
-  // @function Construct an Aligner by the specific matrixs.
-  // =========
-  Aligner(const int8_t* score_matrix,
-          const int&    score_matrix_size,
-          const int8_t* translation_matrix,
-	  const int&    translation_matrix_size);
-
-  ~Aligner(void);
-
-  // =========
-  // @function Build the reference sequence and thus make
-  //             Align(const char* query, s_align* alignment) function;
-  //             otherwise the reference should be given when aligning.
-  //           [NOTICE] If there exists a sequence, that one will be deleted
-  //                    and replaced.
-  // @param    seq    The reference bases;
-  //                  [NOTICE] It is not necessary null terminated.
-  // @param    length The length of bases will be be built.
-  // @return   The length of the built bases.
-  // =========
-  int SetReferenceSequence(const char* seq, const int& length);
-
-  void CleanReferenceSequence(void);
-
-  // =========
-  // @function Set penalties for opening and extending gaps
-  //           [NOTICE] The defaults are 3 and 1 respectively.
-  // =========
-  void SetGapPenalty(const uint8_t& opening, const uint8_t& extending) {
-    gap_opening_penalty_ = opening;
-    gap_extending_penalty_ = extending;
-  };
-
-  // =========
-  // @function Align the query againt the reference that is set by
-  //             SetReferenceSequence.
-  // @param    query     The query sequence.
-  // @param    filter    The filter for the alignment.
-  // @param    alignment The container contains the result.
-  // @return   True: succeed; false: fail.
-  // =========
-  bool Align(const char* query, const Filter& filter, Alignment* alignment) const;
-
-  // =========
-  // @function Align the query againt the reference.
-  //           [NOTICE] The reference won't replace the reference
-  //                      set by SetReferenceSequence.
-  // @param    query     The query sequence.
-  // @param    ref       The reference sequence.
-  //                     [NOTICE] It is not necessary null terminated.
-  // @param    ref_len   The length of the reference sequence.
-  // @param    filter    The filter for the alignment.
-  // @param    alignment The container contains the result.
-  // @return   True: succeed; false: fail.
-  // =========
-  bool Align(const char* query, const char* ref, const int& ref_len,
-             const Filter& filter, Alignment* alignment) const;
-
-  // @function Clear up all containers and thus the aligner is disabled.
-  //             To rebuild the aligner please use Build functions.
-  void Clear(void);
-
-  // =========
-  // @function Rebuild the aligner's ability on default values.
-  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
-  // @return   True: succeed; false: fail.
-  // =========
-  bool ReBuild(void);
-
-  // =========
-  // @function Rebuild the aligner's ability by the specific matrixs.
-  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
-  // @return   True: succeed; false: fail.
-  // =========
-  bool ReBuild(
-          const uint8_t& match_score,
-          const uint8_t& mismatch_penalty,
-	  const uint8_t& gap_opening_penalty,
-	  const uint8_t& gap_extending_penalty);
-
-  // =========
-  // @function Construct an Aligner by the specific matrixs.
-  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
-  // @return   True: succeed; false: fail.
-  // =========
-  bool ReBuild(
-          const int8_t* score_matrix,
-          const int&    score_matrix_size,
-          const int8_t* translation_matrix,
-	  const int&    translation_matrix_size);
-
- private:
-  int8_t* score_matrix_;
-  int     score_matrix_size_;
-  int8_t* translation_matrix_;
-
-  uint8_t match_score_;           // default: 2
-  uint8_t mismatch_penalty_;      // default: 2
-  uint8_t gap_opening_penalty_;   // default: 3
-  uint8_t gap_extending_penalty_; // default: 1
-
-  int8_t* translated_reference_;
-  int32_t reference_length_;
-
-  int TranslateBase(const char* bases, const int& length, int8_t* translated) const;
-  void SetAllDefault(void);
-  void BuildDefaultMatrix(void);
-  void ClearMatrices(void);
-
-  Aligner& operator= (const Aligner&);
-  Aligner (const Aligner&);
-}; // class Aligner
-
-
-// ================
-// inline functions
-// ================
-inline void Aligner::CleanReferenceSequence(void) {
-  if (reference_length_ == 0) return;
-
-  // delete the current buffer
-  if (reference_length_ > 1) delete [] translated_reference_;
-  else delete translated_reference_;
-
-  reference_length_ = 0;
-}
-} // namespace StripedSmithWaterman
-
-#endif // COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
diff --git a/src/ssw.c b/src/ssw.c
deleted file mode 100644
index 213d486..0000000
--- a/src/ssw.c
+++ /dev/null
@@ -1,884 +0,0 @@
-/* The MIT License
-
-   Copyright (c) 2012-1015 Boston College.
-
-   Permission is hereby granted, free of charge, to any person obtaining
-   a copy of this software and associated documentation files (the
-   "Software"), to deal in the Software without restriction, including
-   without limitation the rights to use, copy, modify, merge, publish,
-   distribute, sublicense, and/or sell copies of the Software, and to
-   permit persons to whom the Software is furnished to do so, subject to
-   the following conditions:
-
-   The above copyright notice and this permission notice shall be
-   included in all copies or substantial portions of the Software.
-
-   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
-   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
-   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-   SOFTWARE.
-*/
-
-/* Contact: Mengyao Zhao <zhangmp at bc.edu> */
-
-/*
- *  ssw.c
- *
- *  Created by Mengyao Zhao on 6/22/10.
- *  Copyright 2010 Boston College. All rights reserved.
- *	Version 0.1.4
- *	Last revision by Mengyao Zhao on 02/11/16.
- *
- */
-
-#include <emmintrin.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <math.h>
-#include "SeqLib/ssw.h"
-
-#ifdef __GNUC__
-#define LIKELY(x) __builtin_expect((x),1)
-#define UNLIKELY(x) __builtin_expect((x),0)
-#else
-#define LIKELY(x) (x)
-#define UNLIKELY(x) (x)
-#endif
-
-/* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */
-#define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; }
-
-/* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */
-#define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; }
-
-/*! @function
-  @abstract  Round an integer to the next closest power-2 integer.
-  @param  x  integer to be rounded (in place)
-  @discussion x will be modified.
- */
-#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
-
-typedef struct {
-	uint16_t score;
-	int32_t ref;	 //0-based position
-	int32_t read;    //alignment ending position on read, 0-based
-} alignment_end;
-
-typedef struct {
-	uint32_t* seq;
-	int32_t length;
-} cigar;
-
-struct _profile{
-	__m128i* profile_byte;	// 0: none
-	__m128i* profile_word;	// 0: none
-	const int8_t* read;
-	const int8_t* mat;
-	int32_t readLen;
-	int32_t n;
-	uint8_t bias;
-};
-
-/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
-static __m128i* qP_byte (const int8_t* read_num,
-				  const int8_t* mat,
-				  const int32_t readLen,
-				  const int32_t n,	/* the edge length of the squre matrix mat */
-				  uint8_t bias) {
-
-	int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces.
-								     Each piece is 8 bit. Split the read into 16 segments.
-								     Calculat 16 segments in parallel.
-								   */
-	__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
-	int8_t* t = (int8_t*)vProfile;
-	int32_t nt, i, j, segNum;
-
-	/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
-	for (nt = 0; LIKELY(nt < n); nt ++) {
-		for (i = 0; i < segLen; i ++) {
-			j = i;
-			for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) {
-				*t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias;
-				j += segLen;
-			}
-		}
-	}
-	return vProfile;
-}
-
-/* Striped Smith-Waterman
-   Record the highest score of each reference position.
-   Return the alignment score and ending position of the best alignment, 2nd best alignment, etc.
-   Gap begin and gap extension are different.
-   wight_match > 0, all other weights < 0.
-   The returned positions are 0-based.
- */
-static alignment_end* sw_sse2_byte (const int8_t* ref,
-							 int8_t ref_dir,	// 0: forward ref; 1: reverse ref
-							 int32_t refLen,
-							 int32_t readLen,
-							 const uint8_t weight_gapO, /* will be used as - */
-							 const uint8_t weight_gapE, /* will be used as - */
-							 const __m128i* vProfile,
-							 uint8_t terminate,	/* the best alignment score: used to terminate
-												   the matrix calculation when locating the
-												   alignment beginning point. If this score
-												   is set to 0, it will not be used */
-	 						 uint8_t bias,  /* Shift 0 point to a positive value. */
-							 int32_t maskLen) {
-
-#define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
-					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
-					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
-					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
-					  (m) = _mm_extract_epi16((vm), 0)
-
-	uint8_t max = 0;		                     /* the max alignment score */
-	int32_t end_read = readLen - 1;
-	int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */
-	int32_t segLen = (readLen + 15) / 16; /* number of segment */
-
-	/* array to record the largest score of each reference position */
-	uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1);
-
-	/* array to record the alignment read ending position of the largest score of each reference position */
-	int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
-
-	/* Define 16 byte 0 vector. */
-	__m128i vZero = _mm_set1_epi32(0);
-
-	__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
-
-	int32_t i, j;
-	/* 16 byte insertion begin vector */
-	__m128i vGapO = _mm_set1_epi8(weight_gapO);
-
-	/* 16 byte insertion extension vector */
-	__m128i vGapE = _mm_set1_epi8(weight_gapE);
-
-	/* 16 byte bias vector */
-	__m128i vBias = _mm_set1_epi8(bias);
-
-	__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
-	__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
-	__m128i vTemp;
-	int32_t edge, begin = 0, end = refLen, step = 1;
-
-	/* outer loop to process the reference sequence */
-	if (ref_dir == 1) {
-		begin = refLen - 1;
-		end = -1;
-		step = -1;
-	}
-	for (i = begin; LIKELY(i != end); i += step) {
-//fprintf(stderr, "%d", ref[i]);
-		int32_t cmp;
-		__m128i e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
-							   Any errors to vH values will be corrected in the Lazy_F loop.
-							 */
-//		max16(maxColumn[i], vMaxColumn);
-//		fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
-
-		__m128i vH = pvHStore[segLen - 1];
-		vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
-		const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
-
-		/* Swap the 2 H buffers. */
-		__m128i* pv = pvHLoad;
-		pvHLoad = pvHStore;
-		pvHStore = pv;
-
-		/* inner loop to process the query sequence */
-		for (j = 0; LIKELY(j < segLen); ++j) {
-			vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
-			vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
-	//	max16(maxColumn[i], vH);
-	//	fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]);
-//	int8_t* t;
-//	int32_t ti;
-//for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
-
-			/* Get max from vH, vE and vF. */
-			e = _mm_load_si128(pvE + j);
-			vH = _mm_max_epu8(vH, e);
-			vH = _mm_max_epu8(vH, vF);
-			vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
-
-	//	max16(maxColumn[i], vMaxColumn);
-	//	fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
-//	for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
-
-			/* Save vH values. */
-			_mm_store_si128(pvHStore + j, vH);
-
-			/* Update vE value. */
-			vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
-			e = _mm_subs_epu8(e, vGapE);
-			e = _mm_max_epu8(e, vH);
-			_mm_store_si128(pvE + j, e);
-
-			/* Update vF value. */
-			vF = _mm_subs_epu8(vF, vGapE);
-			vF = _mm_max_epu8(vF, vH);
-
-			/* Load the next vH. */
-			vH = _mm_load_si128(pvHLoad + j);
-		}
-
-		/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
-        /* reset pointers to the start of the saved data */
-        j = 0;
-        vH = _mm_load_si128 (pvHStore + j);
-
-        /*  the computed vF value is for the given column.  since */
-        /*  we are at the end, we need to shift the vF value over */
-        /*  to the next column. */
-        vF = _mm_slli_si128 (vF, 1);
-        vTemp = _mm_subs_epu8 (vH, vGapO);
-		vTemp = _mm_subs_epu8 (vF, vTemp);
-		vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
-		cmp  = _mm_movemask_epi8 (vTemp);
-
-        while (cmp != 0xffff)
-        {
-            vH = _mm_max_epu8 (vH, vF);
-			vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
-            _mm_store_si128 (pvHStore + j, vH);
-            vF = _mm_subs_epu8 (vF, vGapE);
-            j++;
-            if (j >= segLen)
-            {
-                j = 0;
-                vF = _mm_slli_si128 (vF, 1);
-            }
-            vH = _mm_load_si128 (pvHStore + j);
-
-            vTemp = _mm_subs_epu8 (vH, vGapO);
-            vTemp = _mm_subs_epu8 (vF, vTemp);
-            vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
-            cmp  = _mm_movemask_epi8 (vTemp);
-        }
-
-		vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
-		vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
-		cmp = _mm_movemask_epi8(vTemp);
-		if (cmp != 0xffff) {
-			uint8_t temp;
-			vMaxMark = vMaxScore;
-			max16(temp, vMaxScore);
-			vMaxScore = vMaxMark;
-
-			if (LIKELY(temp > max)) {
-				max = temp;
-				if (max + bias >= 255) break;	//overflow
-				end_ref = i;
-
-				/* Store the column with the highest alignment score in order to trace the alignment ending position on read. */
-				for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
-			}
-		}
-
-		/* Record the max score of current column. */
-		max16(maxColumn[i], vMaxColumn);
-	//	fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
-		if (maxColumn[i] == terminate) break;
-	}
-
-	/* Trace the alignment ending position on read. */
-	uint8_t *t = (uint8_t*)pvHmax;
-	int32_t column_len = segLen * 16;
-	for (i = 0; LIKELY(i < column_len); ++i, ++t) {
-		int32_t temp;
-		if (*t == max) {
-			temp = i / 16 + i % 16 * segLen;
-			if (temp < end_read) end_read = temp;
-		}
-	}
-
-	free(pvHmax);
-	free(pvE);
-	free(pvHLoad);
-	free(pvHStore);
-
-	/* Find the most possible 2nd best alignment. */
-	alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
-	bests[0].score = max + bias >= 255 ? 255 : max;
-	bests[0].ref = end_ref;
-	bests[0].read = end_read;
-
-	bests[1].score = 0;
-	bests[1].ref = 0;
-	bests[1].read = 0;
-
-	edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
-	for (i = 0; i < edge; i ++) {
-//			fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
-		if (maxColumn[i] > bests[1].score) {
-			bests[1].score = maxColumn[i];
-			bests[1].ref = i;
-		}
-	}
-	edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
-	for (i = edge + 1; i < refLen; i ++) {
-//			fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]);
-		if (maxColumn[i] > bests[1].score) {
-			bests[1].score = maxColumn[i];
-			bests[1].ref = i;
-		}
-	}
-
-	free(maxColumn);
-	free(end_read_column);
-	return bests;
-}
-
-static __m128i* qP_word (const int8_t* read_num,
-				  const int8_t* mat,
-				  const int32_t readLen,
-				  const int32_t n) {
-
-	int32_t segLen = (readLen + 7) / 8;
-	__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
-	int16_t* t = (int16_t*)vProfile;
-	int32_t nt, i, j;
-	int32_t segNum;
-
-	/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
-	for (nt = 0; LIKELY(nt < n); nt ++) {
-		for (i = 0; i < segLen; i ++) {
-			j = i;
-			for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) {
-				*t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]];
-				j += segLen;
-			}
-		}
-	}
-	return vProfile;
-}
-
-static alignment_end* sw_sse2_word (const int8_t* ref,
-							 int8_t ref_dir,	// 0: forward ref; 1: reverse ref
-							 int32_t refLen,
-							 int32_t readLen,
-							 const uint8_t weight_gapO, /* will be used as - */
-							 const uint8_t weight_gapE, /* will be used as - */
-							 const __m128i* vProfile,
-							 uint16_t terminate,
-							 int32_t maskLen) {
-
-#define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
-					(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
-					(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
-					(m) = _mm_extract_epi16((vm), 0)
-
-	uint16_t max = 0;		                     /* the max alignment score */
-	int32_t end_read = readLen - 1;
-	int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */
-	int32_t segLen = (readLen + 7) / 8; /* number of segment */
-
-	/* array to record the largest score of each reference position */
-	uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2);
-
-	/* array to record the alignment read ending position of the largest score of each reference position */
-	int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
-
-	/* Define 16 byte 0 vector. */
-	__m128i vZero = _mm_set1_epi32(0);
-
-	__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
-	__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
-
-	int32_t i, j, k;
-	/* 16 byte insertion begin vector */
-	__m128i vGapO = _mm_set1_epi16(weight_gapO);
-
-	/* 16 byte insertion extension vector */
-	__m128i vGapE = _mm_set1_epi16(weight_gapE);
-
-	__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
-	__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
-	__m128i vTemp;
-	int32_t edge, begin = 0, end = refLen, step = 1;
-
-	/* outer loop to process the reference sequence */
-	if (ref_dir == 1) {
-		begin = refLen - 1;
-		end = -1;
-		step = -1;
-	}
-	for (i = begin; LIKELY(i != end); i += step) {
-		int32_t cmp;
-		__m128i e, vF = vZero; /* Initialize F value to 0.
-							   Any errors to vH values will be corrected in the Lazy_F loop.
-							 */
-		__m128i vH = pvHStore[segLen - 1];
-		vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
-
-		/* Swap the 2 H buffers. */
-		__m128i* pv = pvHLoad;
-
-		__m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
-
-		const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
-		pvHLoad = pvHStore;
-		pvHStore = pv;
-
-		/* inner loop to process the query sequence */
-		for (j = 0; LIKELY(j < segLen); j ++) {
-			vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
-
-			/* Get max from vH, vE and vF. */
-			e = _mm_load_si128(pvE + j);
-			vH = _mm_max_epi16(vH, e);
-			vH = _mm_max_epi16(vH, vF);
-			vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
-
-			/* Save vH values. */
-			_mm_store_si128(pvHStore + j, vH);
-
-			/* Update vE value. */
-			vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
-			e = _mm_subs_epu16(e, vGapE);
-			e = _mm_max_epi16(e, vH);
-			_mm_store_si128(pvE + j, e);
-
-			/* Update vF value. */
-			vF = _mm_subs_epu16(vF, vGapE);
-			vF = _mm_max_epi16(vF, vH);
-
-			/* Load the next vH. */
-			vH = _mm_load_si128(pvHLoad + j);
-		}
-
-		/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
-		for (k = 0; LIKELY(k < 8); ++k) {
-			vF = _mm_slli_si128 (vF, 2);
-			for (j = 0; LIKELY(j < segLen); ++j) {
-				vH = _mm_load_si128(pvHStore + j);
-				vH = _mm_max_epi16(vH, vF);
-				vMaxColumn = _mm_max_epi16(vMaxColumn, vH); //newly added line
-				_mm_store_si128(pvHStore + j, vH);
-				vH = _mm_subs_epu16(vH, vGapO);
-				vF = _mm_subs_epu16(vF, vGapE);
-				if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
-			}
-		}
-
-end:
-		vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
-		vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
-		cmp = _mm_movemask_epi8(vTemp);
-		if (cmp != 0xffff) {
-			uint16_t temp;
-			vMaxMark = vMaxScore;
-			max8(temp, vMaxScore);
-			vMaxScore = vMaxMark;
-
-			if (LIKELY(temp > max)) {
-				max = temp;
-				end_ref = i;
-				for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
-			}
-		}
-
-		/* Record the max score of current column. */
-		max8(maxColumn[i], vMaxColumn);
-		if (maxColumn[i] == terminate) break;
-	}
-
-	/* Trace the alignment ending position on read. */
-	uint16_t *t = (uint16_t*)pvHmax;
-	int32_t column_len = segLen * 8;
-	for (i = 0; LIKELY(i < column_len); ++i, ++t) {
-		int32_t temp;
-		if (*t == max) {
-			temp = i / 8 + i % 8 * segLen;
-			if (temp < end_read) end_read = temp;
-		}
-	}
-
-	free(pvHmax);
-	free(pvE);
-	free(pvHLoad);
-	free(pvHStore);
-
-	/* Find the most possible 2nd best alignment. */
-	alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
-	bests[0].score = max;
-	bests[0].ref = end_ref;
-	bests[0].read = end_read;
-
-	bests[1].score = 0;
-	bests[1].ref = 0;
-	bests[1].read = 0;
-
-	edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
-	for (i = 0; i < edge; i ++) {
-		if (maxColumn[i] > bests[1].score) {
-			bests[1].score = maxColumn[i];
-			bests[1].ref = i;
-		}
-	}
-	edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
-	for (i = edge; i < refLen; i ++) {
-		if (maxColumn[i] > bests[1].score) {
-			bests[1].score = maxColumn[i];
-			bests[1].ref = i;
-		}
-	}
-
-	free(maxColumn);
-	free(end_read_column);
-	return bests;
-}
-
-static cigar* banded_sw (const int8_t* ref,
-				 const int8_t* read,
-				 int32_t refLen,
-				 int32_t readLen,
-				 int32_t score,
-				 const uint32_t weight_gapO,  /* will be used as - */
-				 const uint32_t weight_gapE,  /* will be used as - */
-				 int32_t band_width,
-				 const int8_t* mat,	/* pointer to the weight matrix */
-				 int32_t n) {
-
-	uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
-	int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, l, max = 0;
-	int64_t s2 = 1024;
-	char op, prev_op;
-	int32_t width, width_d, *h_b, *e_b, *h_c;
-	int8_t *direction, *direction_line;
-	cigar* result = (cigar*)malloc(sizeof(cigar));
-	h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
-	e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
-	h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
-	direction = (int8_t*)malloc(s2 * sizeof(int8_t));
-
-	do {
-		width = band_width * 2 + 3, width_d = band_width * 2 + 1;
-		while (width >= s1) {
-			++s1;
-			kroundup32(s1);
-			h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
-			e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
-			h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
-		}
-		while (width_d * readLen * 3 >= s2) {
-			++s2;
-			kroundup32(s2);
-			if (s2 < 0) {
-				fprintf(stderr, "Alignment score and position are not consensus.\n");
-				exit(1);
-			}
-			direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
-		}
-		direction_line = direction;
-		for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
-		for (i = 0; LIKELY(i < readLen); i ++) {
-			int32_t beg = 0, end = refLen - 1, u = 0, edge;
-			j = i - band_width;	beg = beg > j ? beg : j; // band start
-			j = i + band_width; end = end < j ? end : j; // band end
-			edge = end + 1 < width - 1 ? end + 1 : width - 1;
-			f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
-			direction_line = direction + width_d * i * 3;
-
-			for (j = beg; LIKELY(j <= end); j ++) {
-				int32_t b, e1, f1, d, de, df, dh;
-				set_u(u, band_width, i, j);	set_u(e, band_width, i - 1, j);
-				set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
-				set_d(de, band_width, i, j, 0);
-				set_d(df, band_width, i, j, 1);
-				set_d(dh, band_width, i, j, 2);
-
-				temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
-				temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
-				e_b[u] = temp1 > temp2 ? temp1 : temp2;
-				//fprintf(stderr, "de: %d\twidth_d: %d\treadLen: %d\ts2:%lu\n", de, width_d, readLen, s2);
-				direction_line[de] = temp1 > temp2 ? 3 : 2;
-
-				temp1 = h_c[b] - weight_gapO;
-				temp2 = f - weight_gapE;
-				f = temp1 > temp2 ? temp1 : temp2;
-				direction_line[df] = temp1 > temp2 ? 5 : 4;
-
-				e1 = e_b[u] > 0 ? e_b[u] : 0;
-				f1 = f > 0 ? f : 0;
-				temp1 = e1 > f1 ? e1 : f1;
-				temp2 = h_b[d] + mat[ref[j] * n + read[i]];
-				h_c[u] = temp1 > temp2 ? temp1 : temp2;
-
-				if (h_c[u] > max) max = h_c[u];
-
-				if (temp1 <= temp2) direction_line[dh] = 1;
-				else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
-			}
-			for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
-		}
-		band_width *= 2;
-	} while (LIKELY(max < score));
-	band_width /= 2;
-
-	// trace back
-	i = readLen - 1;
-	j = refLen - 1;
-	e = 0;	// Count the number of M, D or I.
-	l = 0;	// record length of current cigar
-	op = prev_op = 'M';
-	temp2 = 2;	// h
-	while (LIKELY(i > 0)) {
-		set_d(temp1, band_width, i, j, temp2);
-		switch (direction_line[temp1]) {
-			case 1:
-				--i;
-				--j;
-				temp2 = 2;
-				direction_line -= width_d * 3;
-				op = 'M';
-				break;
-			case 2:
-			 	--i;
-				temp2 = 0;	// e
-				direction_line -= width_d * 3;
-				op = 'I';
-				break;
-			case 3:
-				--i;
-				temp2 = 2;
-				direction_line -= width_d * 3;
-				op = 'I';
-				break;
-			case 4:
-				--j;
-				temp2 = 1;
-				op = 'D';
-				break;
-			case 5:
-				--j;
-				temp2 = 2;
-				op = 'D';
-				break;
-			default:
-				fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
-				free(direction);
-				free(h_c);
-				free(e_b);
-				free(h_b);
-				free(c);
-				free(result);
-				return 0;
-		}
-		if (op == prev_op) ++e;
-		else {
-			++l;
-			while (l >= s) {
-				++s;
-				kroundup32(s);
-				c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
-			}
-			c[l - 1] = to_cigar_int(e, prev_op);
-			prev_op = op;
-			e = 1;
-		}
-	}
-	if (op == 'M') {
-		++l;
-		while (l >= s) {
-			++s;
-			kroundup32(s);
-			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
-		}
-		c[l - 1] = to_cigar_int(e + 1, op);
-	}else {
-		l += 2;
-		while (l >= s) {
-			++s;
-			kroundup32(s);
-			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
-		}
-		c[l - 2] = to_cigar_int(e, op);
-		c[l - 1] = to_cigar_int(1, 'M');
-	}
-
-	// reverse cigar
-	c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
-	s = 0;
-	e = l - 1;
-	while (LIKELY(s <= e)) {
-		c1[s] = c[e];
-		c1[e] = c[s];
-		++ s;
-		-- e;
-	}
-	result->seq = c1;
-	result->length = l;
-
-	free(direction);
-	free(h_c);
-	free(e_b);
-	free(h_b);
-	free(c);
-	return result;
-}
-
-static int8_t* seq_reverse(const int8_t* seq, int32_t end)	/* end is 0-based alignment ending position */
-{
-	int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t));
-	int32_t start = 0;
-	while (LIKELY(start <= end)) {
-		reverse[start] = seq[end];
-		reverse[end] = seq[start];
-		++ start;
-		-- end;
-	}
-	return reverse;
-}
-
-s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) {
-	s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile));
-	p->profile_byte = 0;
-	p->profile_word = 0;
-	p->bias = 0;
-
-	if (score_size == 0 || score_size == 2) {
-		/* Find the bias to use in the substitution matrix */
-		int32_t bias = 0, i;
-		for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i];
-		bias = abs(bias);
-
-		p->bias = bias;
-		p->profile_byte = qP_byte (read, mat, readLen, n, bias);
-	}
-	if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n);
-	p->read = read;
-	p->mat = mat;
-	p->readLen = readLen;
-	p->n = n;
-	return p;
-}
-
-void init_destroy (s_profile* p) {
-	free(p->profile_byte);
-	free(p->profile_word);
-	free(p);
-}
-
-s_align* ssw_align (const s_profile* prof,
-					const int8_t* ref,
-				  	int32_t refLen,
-				  	const uint8_t weight_gapO,
-				  	const uint8_t weight_gapE,
-					const uint8_t flag,	//  (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
-					const uint16_t filters,
-					const int32_t filterd,
-					const int32_t maskLen) {
-
-	alignment_end* bests = 0, *bests_reverse = 0;
-	__m128i* vP = 0;
-	int32_t word = 0, band_width = 0, readLen = prof->readLen;
-	int8_t* read_reverse = 0;
-	cigar* path;
-	s_align* r = (s_align*)calloc(1, sizeof(s_align));
-	r->ref_begin1 = -1;
-	r->read_begin1 = -1;
-	r->cigar = 0;
-	r->cigarLen = 0;
-	if (maskLen < 15) {
-		fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
-	}
-
-	// Find the alignment scores and ending positions
-	if (prof->profile_byte) {
-		bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
-		if (prof->profile_word && bests[0].score == 255) {
-			free(bests);
-			bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
-			word = 1;
-		} else if (bests[0].score == 255) {
-			fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
-			free(r);
-			return NULL;
-		}
-	}else if (prof->profile_word) {
-		bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
-		word = 1;
-	}else {
-		fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
-		free(r);
-		return NULL;
-	}
-	r->score1 = bests[0].score;
-	r->ref_end1 = bests[0].ref;
-//fprintf(stderr, "0based ref_end: %d\n", r->ref_end1);
-	r->read_end1 = bests[0].read;
-	if (maskLen >= 15) {
-		r->score2 = bests[1].score;
-		r->ref_end2 = bests[1].ref;
-	} else {
-		r->score2 = 0;
-		r->ref_end2 = -1;
-	}
-	free(bests);
-	if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;
-
-	// Find the beginning position of the best alignment.
-	read_reverse = seq_reverse(prof->read, r->read_end1);
-	if (word == 0) {
-		vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
-		bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
-	} else {
-		vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
-		bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
-	}
-	free(vP);
-	free(read_reverse);
-	r->ref_begin1 = bests_reverse[0].ref;
-	r->read_begin1 = r->read_end1 - bests_reverse[0].read;
-	free(bests_reverse);
-	if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;
-
-	// Generate cigar.
-	refLen = r->ref_end1 - r->ref_begin1 + 1;
-	readLen = r->read_end1 - r->read_begin1 + 1;
-	band_width = abs(refLen - readLen) + 1;
-	path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
-	if (path == 0) {
-		free(r);
-		r = NULL;
-	}
-	else {
-		r->cigar = path->seq;
-		r->cigarLen = path->length;
-		free(path);
-	}
-
-end:
-	return r;
-}
-
-void align_destroy (s_align* a) {
-	free(a->cigar);
-	free(a);
-}
-/*
-inline char cigar_int_to_op(uint32_t cigar_int) {
-	return UNLIKELY((cigar_int & 0xfU) > 8) ? 'M': MAPSTR[cigar_int & 0xfU];
-}
-
-
-inline uint32_t cigar_int_to_len (uint32_t cigar_int)
-{
-	return cigar_int >> BAM_CIGAR_SHIFT;
-}*/
diff --git a/src/ssw_cpp.cpp b/src/ssw_cpp.cpp
deleted file mode 100644
index b7018d3..0000000
--- a/src/ssw_cpp.cpp
+++ /dev/null
@@ -1,477 +0,0 @@
-#include "SeqLib/ssw_cpp.h"
-#include "SeqLib/ssw.h"
-
-#include <sstream>
-
-namespace {
-
-static const int8_t kBaseTranslation[128] = {
-    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
-    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
-    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
-    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
-  //   A     C            G
-    4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
-  //             T
-    4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
-  //   a     c            g
-    4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
-  //             t
-    4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
-};
-
-void BuildSwScoreMatrix(const uint8_t& match_score,
-                        const uint8_t& mismatch_penalty,
-			int8_t* matrix) {
-
-  // The score matrix looks like
-  //                 // A,  C,  G,  T,  N
-  //  score_matrix_ = { 2, -2, -2, -2, -2, // A
-  //                   -2,  2, -2, -2, -2, // C
-  //                   -2, -2,  2, -2, -2, // G
-  //                   -2, -2, -2,  2, -2, // T
-  //                   -2, -2, -2, -2, -2};// N
-
-  int id = 0;
-  for (int i = 0; i < 4; ++i) {
-    for (int j = 0; j < 4; ++j) {
-      matrix[id] = ((i == j) ? match_score : static_cast<int8_t>(-mismatch_penalty));
-      ++id;
-    }
-    matrix[id] = static_cast<int8_t>(-mismatch_penalty); // For N
-    ++id;
-  }
-
-  for (int i = 0; i < 5; ++i)
-    matrix[id++] = static_cast<int8_t>(-mismatch_penalty); // For N
-
-}
-
-void ConvertAlignment(const s_align& s_al,
-                      const int& query_len,
-                      StripedSmithWaterman::Alignment* al) {
-  al->sw_score           = s_al.score1;
-  al->sw_score_next_best = s_al.score2;
-  al->ref_begin          = s_al.ref_begin1;
-  al->ref_end            = s_al.ref_end1;
-  al->query_begin        = s_al.read_begin1;
-  al->query_end          = s_al.read_end1;
-  al->ref_end_next_best  = s_al.ref_end2;
-
-  al->cigar.clear();
-  al->cigar_string.clear();
-
-  if (s_al.cigarLen > 0) {
-    std::ostringstream cigar_string;
-    if (al->query_begin > 0) {
-      uint32_t cigar = to_cigar_int(al->query_begin, 'S');
-      al->cigar.push_back(cigar);
-      cigar_string << al->query_begin << 'S';
-    }
-
-    for (int i = 0; i < s_al.cigarLen; ++i) {
-      al->cigar.push_back(s_al.cigar[i]);
-      cigar_string << cigar_int_to_len(s_al.cigar[i]) << cigar_int_to_op(s_al.cigar[i]);
-    }
-
-    int end = query_len - al->query_end - 1;
-    if (end > 0) {
-      uint32_t cigar = to_cigar_int(end, 'S');
-      al->cigar.push_back(cigar);
-      cigar_string << end << 'S';
-    }
-
-    al->cigar_string = cigar_string.str();
-  } // end if
-}
-
-// @Function:
-//     Calculate the length of the previous cigar operator
-//     and store it in new_cigar and new_cigar_string.
-//     Clean up in_M (false), in_X (false), length_M (0), and length_X(0).
-void CleanPreviousMOperator(
-    bool* in_M,
-    bool* in_X,
-    uint32_t* length_M,
-    uint32_t* length_X,
-    std::vector<uint32_t>* new_cigar,
-    std::ostringstream* new_cigar_string) {
-  if (*in_M) {
-    uint32_t match = to_cigar_int(*length_M, '=');
-    new_cigar->push_back(match);
-    (*new_cigar_string) << *length_M << '=';
-  } else if (*in_X){ //in_X
-    uint32_t match = to_cigar_int(*length_X, 'X');
-    new_cigar->push_back(match);
-    (*new_cigar_string) << *length_X << 'X';
-  }
-
-  // Clean up
-  *in_M = false;
-  *in_X = false;
-  *length_M = 0;
-  *length_X = 0;
-}
-
-// @Function:
-//     1. Calculate the number of mismatches.
-//     2. Modify the cigar string:
-//         differentiate matches (M) and mismatches(X).
-//         Note that SSW does not differentiate matches and mismatches.
-// @Return:
-//     The number of mismatches.
-int CalculateNumberMismatch(
-    StripedSmithWaterman::Alignment* al,
-    int8_t const *ref,
-    int8_t const *query,
-    const int& query_len) {
-
-  ref   += al->ref_begin;
-  query += al->query_begin;
-  int mismatch_length = 0;
-
-  std::vector<uint32_t> new_cigar;
-  std::ostringstream new_cigar_string;
-
-  if (al->query_begin > 0) {
-    uint32_t cigar = to_cigar_int(al->query_begin, 'S');
-    new_cigar.push_back(cigar);
-    new_cigar_string << al->query_begin << 'S';
-  }
-
-  bool in_M = false; // the previous is match
-  bool in_X = false; // the previous is mismatch
-  uint32_t length_M = 0;
-  uint32_t length_X = 0;
-
-  for (unsigned int i = 0; i < al->cigar.size(); ++i) {
-    char op = cigar_int_to_op(al->cigar[i]);
-    uint32_t length = cigar_int_to_len(al->cigar[i]);
-    if (op == 'M') {
-      for (uint32_t j = 0; j < length; ++j) {
-	if (*ref != *query) {
-	  ++mismatch_length;
-          if (in_M) { // the previous is match; however the current one is mismatche
-	    uint32_t match = to_cigar_int(length_M, '=');
-	    new_cigar.push_back(match);
-	    new_cigar_string << length_M << '=';
-	  }
-	  length_M = 0;
-	  ++length_X;
-	  in_M = false;
-	  in_X = true;
-	} else { // *ref == *query
-	  if (in_X) { // the previous is mismatch; however the current one is matche
-	    uint32_t match = to_cigar_int(length_X, 'X');
-	    new_cigar.push_back(match);
-	    new_cigar_string << length_X << 'X';
-	  }
-	  ++length_M;
-	  length_X = 0;
-	  in_M = true;
-	  in_X = false;
-	} // end of if (*ref != *query)
-	++ref;
-	++query;
-      }
-    } else if (op == 'I') {
-      query += length;
-      mismatch_length += length;
-      CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
-      new_cigar.push_back(al->cigar[i]);
-      new_cigar_string << length << 'I';
-    } else if (op == 'D') {
-      ref += length;
-      mismatch_length += length;
-      CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
-      new_cigar.push_back(al->cigar[i]);
-      new_cigar_string << length << 'D';
-    }
-  }
-
-  CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
-
-  int end = query_len - al->query_end - 1;
-  if (end > 0) {
-    uint32_t cigar = to_cigar_int(end, 'S');
-    new_cigar.push_back(cigar);
-    new_cigar_string << end << 'S';
-  }
-
-  al->cigar_string.clear();
-  al->cigar.clear();
-  al->cigar_string = new_cigar_string.str();
-  al->cigar = new_cigar;
-
-  return mismatch_length;
-}
-
-void SetFlag(const StripedSmithWaterman::Filter& filter, uint8_t* flag) {
-  if (filter.report_begin_position) *flag |= 0x08;
-  if (filter.report_cigar) *flag |= 0x0f;
-}
-
-// http://www.cplusplus.com/faq/sequences/arrays/sizeof-array/#cpp
-template <typename T, size_t N>
-inline size_t SizeOfArray( const T(&)[ N ] )
-{
-  return N;
-}
-
-} // namespace
-
-
-
-namespace StripedSmithWaterman {
-
-Aligner::Aligner(void)
-    : score_matrix_(NULL)
-    , score_matrix_size_(5)
-    , translation_matrix_(NULL)
-    , match_score_(2)
-    , mismatch_penalty_(2)
-    , gap_opening_penalty_(3)
-    , gap_extending_penalty_(1)
-    , translated_reference_(NULL)
-    , reference_length_(0)
-{
-  BuildDefaultMatrix();
-}
-
-Aligner::Aligner(
-    const uint8_t& match_score,
-    const uint8_t& mismatch_penalty,
-    const uint8_t& gap_opening_penalty,
-    const uint8_t& gap_extending_penalty)
-
-    : score_matrix_(NULL)
-    , score_matrix_size_(5)
-    , translation_matrix_(NULL)
-    , match_score_(match_score)
-    , mismatch_penalty_(mismatch_penalty)
-    , gap_opening_penalty_(gap_opening_penalty)
-    , gap_extending_penalty_(gap_extending_penalty)
-    , translated_reference_(NULL)
-    , reference_length_(0)
-{
-  BuildDefaultMatrix();
-}
-
-Aligner::Aligner(const int8_t* score_matrix,
-                 const int&    score_matrix_size,
-	         const int8_t* translation_matrix,
-		 const int&    translation_matrix_size)
-
-    : score_matrix_(NULL)
-    , score_matrix_size_(score_matrix_size)
-    , translation_matrix_(NULL)
-    , match_score_(2)
-    , mismatch_penalty_(2)
-    , gap_opening_penalty_(3)
-    , gap_extending_penalty_(1)
-    , translated_reference_(NULL)
-    , reference_length_(0)
-{
-  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
-  memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
-  translation_matrix_ = new int8_t[translation_matrix_size];
-  memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
-}
-
-
-Aligner::~Aligner(void){
-  Clear();
-}
-
-int Aligner::SetReferenceSequence(const char* seq, const int& length) {
-
-  int len = 0;
-  if (translation_matrix_) {
-    // calculate the valid length
-    //int calculated_ref_length = static_cast<int>(strlen(seq));
-    //int valid_length = (calculated_ref_length > length)
-    //                   ? length : calculated_ref_length;
-    int valid_length = length;
-    // delete the current buffer
-    CleanReferenceSequence();
-    // allocate a new buffer
-    translated_reference_ = new int8_t[valid_length];
-
-    len = TranslateBase(seq, valid_length, translated_reference_);
-  } else {
-    // nothing
-  }
-
-  reference_length_ = len;
-  return len;
-
-
-}
-
-int Aligner::TranslateBase(const char* bases, const int& length,
-    int8_t* translated) const {
-
-  const char* ptr = bases;
-  int len = 0;
-  for (int i = 0; i < length; ++i) {
-    translated[i] = translation_matrix_[(int) *ptr];
-    ++ptr;
-    ++len;
-  }
-
-  return len;
-}
-
-
-bool Aligner::Align(const char* query, const Filter& filter,
-                    Alignment* alignment) const
-{
-  if (!translation_matrix_) return false;
-  if (reference_length_ == 0) return false;
-
-  int query_len = strlen(query);
-  if (query_len == 0) return false;
-  int8_t* translated_query = new int8_t[query_len];
-  TranslateBase(query, query_len, translated_query);
-
-  const int8_t score_size = 2;
-  s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
-                                score_matrix_size_, score_size);
-
-  uint8_t flag = 0;
-  SetFlag(filter, &flag);
-  s_align* s_al = ssw_align(profile, translated_reference_, reference_length_,
-                                 static_cast<int>(gap_opening_penalty_),
-				 static_cast<int>(gap_extending_penalty_),
-				 flag, filter.score_filter, filter.distance_filter, query_len);
-
-  alignment->Clear();
-  ConvertAlignment(*s_al, query_len, alignment);
-  alignment->mismatches = CalculateNumberMismatch(&*alignment, translated_reference_, translated_query, query_len);
-
-
-  // Free memory
-  delete [] translated_query;
-  align_destroy(s_al);
-  init_destroy(profile);
-
-  return true;
-}
-
-
-bool Aligner::Align(const char* query, const char* ref, const int& ref_len,
-                    const Filter& filter, Alignment* alignment) const
-{
-  if (!translation_matrix_) return false;
-
-  int query_len = strlen(query);
-  if (query_len == 0) return false;
-  int8_t* translated_query = new int8_t[query_len];
-  TranslateBase(query, query_len, translated_query);
-
-  // calculate the valid length
-  //int calculated_ref_length = static_cast<int>(strlen(ref));
-  //int valid_ref_len = (calculated_ref_length > ref_len)
-  //                    ? ref_len : calculated_ref_length;
-  int valid_ref_len = ref_len;
-  int8_t* translated_ref = new int8_t[valid_ref_len];
-  TranslateBase(ref, valid_ref_len, translated_ref);
-
-
-  const int8_t score_size = 2;
-  s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
-                                score_matrix_size_, score_size);
-
-  uint8_t flag = 0;
-  SetFlag(filter, &flag);
-  s_align* s_al = ssw_align(profile, translated_ref, valid_ref_len,
-                                 static_cast<int>(gap_opening_penalty_),
-				 static_cast<int>(gap_extending_penalty_),
-				 flag, filter.score_filter, filter.distance_filter, query_len);
-
-  alignment->Clear();
-  ConvertAlignment(*s_al, query_len, alignment);
-  alignment->mismatches = CalculateNumberMismatch(&*alignment, translated_ref, translated_query, query_len);
-
-  // Free memory
-  delete [] translated_query;
-  delete [] translated_ref;
-  align_destroy(s_al);
-  init_destroy(profile);
-
-  return true;
-}
-
-void Aligner::Clear(void) {
-  ClearMatrices();
-  CleanReferenceSequence();
-}
-
-void Aligner::SetAllDefault(void) {
-  score_matrix_size_     = 5;
-  match_score_           = 2;
-  mismatch_penalty_      = 2;
-  gap_opening_penalty_   = 3;
-  gap_extending_penalty_ = 1;
-  reference_length_      = 0;
-}
-
-bool Aligner::ReBuild(void) {
-  if (translation_matrix_) return false;
-
-  SetAllDefault();
-  BuildDefaultMatrix();
-
-  return true;
-}
-
-bool Aligner::ReBuild(
-    const uint8_t& match_score,
-    const uint8_t& mismatch_penalty,
-    const uint8_t& gap_opening_penalty,
-    const uint8_t& gap_extending_penalty) {
-  if (translation_matrix_) return false;
-
-  SetAllDefault();
-
-  match_score_           = match_score;
-  mismatch_penalty_      = mismatch_penalty;
-  gap_opening_penalty_   = gap_opening_penalty;
-  gap_extending_penalty_ = gap_extending_penalty;
-
-  BuildDefaultMatrix();
-
-  return true;
-}
-
-bool Aligner::ReBuild(
-    const int8_t* score_matrix,
-    const int&    score_matrix_size,
-    const int8_t* translation_matrix,
-    const int&    translation_matrix_size) {
-
-  ClearMatrices();
-  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
-  memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
-  translation_matrix_ = new int8_t[translation_matrix_size];
-  memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
-
-  return true;
-}
-
-void Aligner::BuildDefaultMatrix(void) {
-  ClearMatrices();
-  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
-  BuildSwScoreMatrix(match_score_, mismatch_penalty_, score_matrix_);
-  translation_matrix_ = new int8_t[SizeOfArray(kBaseTranslation)];
-  memcpy(translation_matrix_, kBaseTranslation, sizeof(int8_t) * SizeOfArray(kBaseTranslation));
-}
-
-void Aligner::ClearMatrices(void) {
-  delete [] score_matrix_;
-  score_matrix_ = NULL;
-
-  delete [] translation_matrix_;
-  translation_matrix_ = NULL;
-}
-} // namespace StripedSmithWaterman

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libseqlib.git



More information about the debian-med-commit mailing list