[med-svn] [libssw] 01/03: Imported Upstream version 0.0~git20160511.7d84de2

Sascha Steinbiss satta at debian.org
Fri Jun 24 09:54:16 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository libssw.

commit f4848961a721b52d1211b733ac389c601df47103
Author: Sascha Steinbiss <satta at debian.org>
Date:   Thu Jun 23 14:28:25 2016 +0000

    Imported Upstream version 0.0~git20160511.7d84de2
---
 README.md                    | 227 +++++++++++
 demo/100k.fa                 |   3 +
 demo/100k_illumina1.fastq.gz | Bin 0 -> 6081335 bytes
 demo/10M.fa                  |   3 +
 demo/10k.fa                  |   2 +
 demo/1M.fa                   |   3 +
 demo/1k.fa                   |   3 +
 demo/54mer_hap1_1.100.fa     | 200 ++++++++++
 demo/54mer_hap1_1.100.fastq  | 400 ++++++++++++++++++++
 demo/Virus_genome.fa.gz      | Bin 0 -> 822 bytes
 demo/ref.fa                  |  15 +
 demo/test.seq                |  12 +
 src/.gitignore               |  12 +
 src/Makefile                 |  53 +++
 src/example.c                | 156 ++++++++
 src/example.cpp              |  53 +++
 src/kseq.h                   | 223 +++++++++++
 src/main.c                   | 465 +++++++++++++++++++++++
 src/pyssw.py                 | 384 +++++++++++++++++++
 src/ssw.c                    | 884 +++++++++++++++++++++++++++++++++++++++++++
 src/ssw.h                    | 188 +++++++++
 src/ssw/Aligner.java         | 161 ++++++++
 src/ssw/Alignment.java       |  66 ++++
 src/ssw/Example.java         |  39 ++
 src/ssw_cpp.cpp              | 477 +++++++++++++++++++++++
 src/ssw_cpp.h                | 219 +++++++++++
 src/ssw_lib.py               | 226 +++++++++++
 src/sswjni.c                 |  61 +++
 28 files changed, 4535 insertions(+)

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..83e67b7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,227 @@
+#SSW Library: An SIMD Smith-Waterman C/C++/Python/Java Library for Use in Genomic Applications
+
+License: MIT
+
+Copyright (c) 2012-2015 Boston College
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+Author:  
+C implementation: Mengyao Zhao  
+C++ wrapper: Wan-Ping Lee  
+Python wrapper: Yongan Zhao  
+Java wrapper: Daniel Cameron
+
+Contact:  
+Mengyao Zhao <zhaomengyao at gmail.com>  
+Wan-Ping Lee <wanping.lee at gmail.com>  
+Yongan Zhao <zhaoyanswill at gmail.com>   
+Daniel Cameron <cameron.d at wehi.edu.au>  
+
+Last revision: 04/05/2016
+
+##Overview
+SSW is a fast implementation of the Smith-Waterman algorithm, which uses the Single-Instruction Multiple-Data (SIMD) instructions to parallelize the algorithm at the instruction level. SSW library provides an API that can be flexibly used by programs written in C, C++ and other languages. We also provide a software that can do protein and genome alignment directly. Current version of our implementation is ~50 times faster than an ordinary Smith-Waterman. It can return the Smith-Waterman  [...]
+
+Note: When SSW open a gap, the gap open penalty alone is applied.
+
+## C/C++ interface
+
+###How to use the API
+The API files include ssw.h and ssw.c, which can be directly used by any C or C++ program. For the C++ users who are more comfortable to use a C++ style interface, an additional C++ wrapper is provided with the file ssw_cpp.cpp and ssw_cpp.h.
+
+To use the C style API, please:
+
+1. Download ssw.h and ssw.c, and put them in the same folder of your own program files.
+2. Write #include "ssw.h" into your file that will call the API functions.
+3. The API files are ready to be compiled together with your own C/C++ files.
+
+The API function descriptions are in the file ssw.h. One simple example of the API usage is example.c. The Smith-Waterman penalties need to be integers. Small penalty numbers such as: match: 2, mismatch: -1, gap open (the total penalty when one gap is opened): -3, gap extension: -1 are recommended, which will lead to shorter running time.  
+
+To use the C++ style API, please:
+
+1. Download ssw.h, ssw.c, ssw_cpp.cpp and ssw_cpp.h and put them in the same folder of your own program files.
+2. Write #include "ssw_cpp.h" into your file that will call the API functions.
+3. The API files are ready to be compiled together with your own C/C++ files.
+
+The API function descriptions are in the file ssw_cpp.h. A simple example of using the C++ API is example.cpp.
+
+###Speed and memory usage of the API
+Test data set: 
+Target sequence: reference genome of E. coli strain 536 (4,938,920 nucleotides) from NCBI
+Query sequences: 1000 reads of Ion Torrent sequenced E. coli strain DH10B (C23-140, 318 PGM Run, 11/2011), read length: ~25-540 bp, most reads are ~200 bp
+
+CPU time:
+
+* AMD CPU: default penalties: ~880 seconds; -m1 -x3 -o5 -e2: ~460 seconds
+* Intel CPU: default penalties: ~960 seconds; -m1 -x3 -o5 -e2: ~500 seconds 
+
+Memory usage: ~40MB
+ 
+###Install the software
+
+1. Download the software from https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library.
+2. cd src
+3. make
+4. The executable file will be ssw_test.
+
+###Run the software
+```
+Usage: ssw_test [options] ... <target.fasta> <query.fasta>(or <query.fastq>)
+Options:
+	-m N	N is a positive integer for weight match in genome sequence alignment. [default: 2]
+	-x N	N is a positive integer. -N will be used as weight mismatch in genome sequence alignment. [default: 2]
+	-o N	N is a positive integer. -N will be used as the weight for the gap opening. [default: 3]
+	-e N	N is a positive integer. -N will be used as the weight for the gap extension. [default: 1]
+	-p	Do protein sequence alignment. Without this option, the ssw_test will do genome sequence alignment.
+	-a FILE	FILE is either the Blosum or Pam weight matrix. [default: Blosum50]
+	-c	Return the alignment path.
+	-f N	N is a positive integer. Only output the alignments with the Smith-Waterman score >= N.
+	-r	The best alignment will be picked between the original read alignment and the reverse complement read alignment.
+	-s	Output in SAM format. [default: no header]
+	-h	If -s is used, include header in SAM output.
+```
+
+###Software input
+The input files can be in FASTA or FASTQ format. Both target and query files can contain multiple sequences. Each sequence in the query file will be aligned with all sequences in the target file. If your target file has N sequences and your query file has M sequences, the results will have M*N alignments.
+
+###Software output
+The software can output SAM format or BLAST like format results. 
+
+1. SAM format output:
+
+Example:
+```
+ at HD VN:1.4  SO:queryname
+ at SQ SN:chr1 LN:1001
+6:163296599:F:198;None;None/1   0   chr1    453 5   3M2D3M1D4M2D6M1D5M1D5M2I7M  *   0   0   CCAGCCCAAAATCTGTTTTAATGGTGGATTTGTGT *   AS:i:37 NM:i:11 ZS:i:28
+3:153409880:F:224;None;3,153410143,G,A/1    0   chr1    523 4   2M1D32M1D3M1D6M1D8M *   0   0   GAAGAGTTAATTTAAGTCACTTCAAACAGATTACGTATCTTTTTTTTCCCT *   AS:i:42 NM:i:16 ZS:i:41
+Y:26750420:R:-132;None;None/1   0   chr1    120 4   2M1I4M3D3M1I7M2I9M2D6M1I8M  *   0   0   AACAACAGAAGTTAATTAGCTTCAAAAATACTTTATATTTGCAA    *   AS:i:32 NM:i:16 ZS:i:29
+13:91170622:R:-276;None;None/1  0   chr1    302 4   8M1D8M1D3M2D6M1D4M2I2M1D2M3D5M1I4M  *   0   0   CATTTATTGTTGTTTTTAAAGATTAAATGATTAAATGTTTCAAAA   *   AS:i:32 NM:i:18 ZS:i:30
+15:37079528:R:-240;None;None/1  0   chr1    4   5   4M2D4M1D9M1I3M4I16M1I3M1D4M2D5M *   0   0   ACAGTGATGCCAAGCCAGTGGGTTTTAGCTTGTGGAGTTCCATAGGAGCGATGC  *   AS:i:30 NM:i:22 ZS:i:23
+9:92308501:R:-176;None;None/1   0   chr1    142 4   4M3I5M4D10M2D4M1I2M2I6M5D1M1D6M2D3M *   0   0   AATAACCATAAAAATGGGCAAAGCAGCCTTCAGGGCTGCTGTTTCTA *   AS:i:26 NM:i:25 ZS:i:26
+...
+```
+
+What is the output?
+Please check the document "The SAM Format Specification" at: http://samtools.sourceforge.net/SAM1.pdf for the full description.
+The additional optional field "ZS" indicates the suboptimal alignment score. For example, the 1st record in the upper example means the optimal alignment score of the given sequence is 37; the suboptimal alignment score is 28; the mismatch and INDEL base count within the aligned fragment of the read is 11.
+
+2. An example of the BLAST like output:
+
+```
+target_name: chr1
+query_name: 6:163296599:F:198;None;None/1
+optimal_alignment_score: 37	sub-optimal_alignment_score: 28	strand: +	target_begin: 453	target_end: 492	query_begin: 17	query_end: 51
+
+Target:      453    CCAATGCCACAAAACATCTGTCTCTAACTGGTG--TGTGTGT    492
+                    |||  ||| ||||  |||||| | ||| |||||  |*|||||
+Query:        17    CCA--GCC-CAAA--ATCTGT-TTTAA-TGGTGGATTTGTGT    51
+
+target_name: chr1
+query_name: 3:153409880:F:224;None;3,153410143,G,A/1
+optimal_alignment_score: 42	sub-optimal_alignment_score: 41	strand: +	target_begin: 523	target_end: 577	query_begin: 3	query_end: 53
+
+Target:      523    GAGAGAGAAAATTTCACTCCCTCCATAAATCTCACAGTATTCTTTTCTTTTTCCT    577
+                    || ||||**|||||*|*||*||*||*|*|**|*|| ||| |||||| ||||*|||
+Query:         3    GA-AGAGTTAATTTAAGTCACTTCAAACAGATTAC-GTA-TCTTTT-TTTTCCCT    53
+...
+```
+
+##Python interface
+
+###How to use the python wrapper ssw_lib.py
+
+ssw_lib.py is a Python wrapper that fully supports APIs of the C library. To use this Python library, C programming knowledge is not required.
+
+To use the python wrapper, please:
+
+1. Compile the src folder by either using the makefile or by compiling a dynamic shared library with gcc  
+```gcc -Wall -O3 -pipe -fPIC -shared -rdynamic -o libssw.so ssw.c ssw.h```
+2. Put libssw.so and ssw_lib.py in the same directory of your own program files or directories in sys.paths.
+3. The LD_LIBRARY_PATH environment variable may need to be modified to include the directory of the dynamic library libssw.so by one of the two following mathods:
+    * ```export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:path_of_libssw.so```
+    * For a definitive inclusion edit /etc/ld.so.conf and add the path of the libssw.so. Then, update the cache by ```/sbin/ldconfig```.
+4. In a python script or in a interactive interpreter, import the CSsw class by: ```from ssw_lib import CSsw``` or ```import ssw_lib``` and then call ```ssw_lib.CSsw```
+5. Call APIs with input parameters and parse the results (Please see pyssw.py as an example).
+
+###Run pyssw standalone 
+```
+usage: pyssw.py [-h] [-l SLIBPATH] [-m NMATCH] [-x NMISMATCH] [-o NOPEN]
+                [-e NEXT] [-p] [-a SMATRIX] [-c] [-f NTHR] [-r] [-s] [-header]
+                [target] [query]
+
+positional arguments:
+  target                targe file
+  query                 query file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -l SLIBPATH, --sLibPath SLIBPATH
+                        path of libssw.so
+  -m NMATCH, --nMatch NMATCH
+                        a positive integer as the score for a match in genome
+                        sequence alignment. [default: 2]
+  -x NMISMATCH, --nMismatch NMISMATCH
+                        a positive integer as the score for a mismatch in
+                        genome sequence alignment. [default: 2]
+  -o NOPEN, --nOpen NOPEN
+                        a positive integer as the penalty for the gap opening
+                        in genome sequence alignment. [default: 3]
+  -e NEXT, --nExt NEXT  a positive integer as the penalty for the gap
+                        extension in genome sequence alignment. [default: 1]
+  -p, --bProtien        Do protein sequence alignment. Without this option,
+                        the ssw_test will do genome sequence alignment.
+                        [default: False]
+  -a SMATRIX, --sMatrix SMATRIX
+                        a file for either Blosum or Pam weight matrix.
+                        [default: Blosum50]
+  -c, --bPath           Return the alignment path. [default: False]
+  -f NTHR, --nThr NTHR  a positive integer. Only output the alignments with
+                        the Smith-Waterman score >= N.
+  -r, --bBest           The best alignment will be picked between the original
+                        read alignment and the reverse complement read
+                        alignment. [default: False]
+  -s, --bSam            Output in SAM format. [default: no header]
+  -header, --bHeader    If -s is used, include header in SAM output.
+```
+
+###pyssw output
+
+The software can output SAM format or BLAST like format results. 
+
+###Speed and memory usage of pyssw
+
+The speed and memory are about the same as the c library.
+	
+##Java interface
+
+The java wrapper is a thin JNI (Java Native Interface) wrapper around the native C implementation.
+
+###Building
+
+Only the C, C++ and C shared libraries are generated from the default make goal, and as such, the java interface must be built explicitly.
+
+1. Ensure `javac` and `jar` are on path
+2. Ensure JAVA_HOME is set to an installed JRE or JDK, or the JNI include directory is included in the C system library search path
+3. `make java` from the src directory.
+4. libsswjni.so and ssw.jar should be built.
+
+###How to use the java wrapper
+
+The java wrapper consist of the following components:
+
+* libsswjni.so: native C library exposing the JNI entry points
+* ssw.jar is a java library containing the Java interface to the native C library. This small wrapper library is composed of:
+    * ssw.Aligner java class: a thread-safe static class that exposes two `align()` methods. The first exposes the SSW C library directly. No error checking is performed on arguments passed to this method and misuse is highly likely to crash the JVM. The second align() method is a more user-friendly entry point that exposes a simpler API and performs some basic error checking.
+    * ssw.Alignment java class: this class stores alignment results. Each each field has a direct correspondence and identical meaning to the C s_align struct.
+    * ssw.Example java class: java version of the example_c sample code. Run `java -jar ssw.jar` to execute the sample.
+
+To use the library, either reference the ssw.jar or including the Aligner and Alignment classes directly. As for any JNI library, the native library must be loaded (using `System.loadLibrary("sswjni")` or similar) before invokation of native methods. For the JVM to find the library, ensure that either the library is included in the LD_LIBRARY_PATH environment variable, or `-Djava.library.path=<directory containing libsswjni.so>` is set on the java command line.
+
+###Please cite this paper, if you need:
+http://dx.plos.org/10.1371/journal.pone.0082138
diff --git a/demo/100k.fa b/demo/100k.fa
new file mode 100644
index 0000000..776b683
--- /dev/null
+++ b/demo/100k.fa
@@ -0,0 +1,3 @@
+>chr4	50000	150000
+GGACCCCGCTCTGTAATGTAAAATGTGGGGACCCCACTCTCTAATGTAAAATGTGAGCTGCCAGCTGTGGGCTGTGCCTCAGTGGTAGATGGCAGGGTTTGGGAGAGGACACAGGCCACCAGGAGAGGGCAAGCAGAATTGCTGCAGCCCAGGGCTTAGTGAGTAGGGATCCATTGCTTTGAAATATAAATAGCCGGCCGGGCGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACAGTGAAATCCCGTCTCTACTAAAAATACAAAAAAATTAGCCAGGTGTGGTGGCGGGCACCTGTAGTCCCAGCTACTGGGGAGGCTGAGTCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTAAGCCAAGATCGCACCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAAAAAGAAAAAAA [...]
+
diff --git a/demo/100k_illumina1.fastq.gz b/demo/100k_illumina1.fastq.gz
new file mode 100644
index 0000000..547aad2
Binary files /dev/null and b/demo/100k_illumina1.fastq.gz differ
diff --git a/demo/10M.fa b/demo/10M.fa
new file mode 100644
index 0000000..33f24a5
--- /dev/null
+++ b/demo/10M.fa
@@ -0,0 +1,3 @@
+>chr2	50000	10050000
+ACCCAAAAGAAAATAAATCATTCTAACAAAAAGACATATGCATGCATGTAGTCATTGCAGCACTATTCATAATAACAAAGACATGGAATCAATGAAGATGCCCATCAGTGGTGGACTGGGTAAAGAAAATGTGCTACATATACACCATAGAATACTATGCAGCCATTAAAAAAGAGTGAAATCATGTCCTTTGCAGCAACATGCATTTAGCTAGAGGCCATTATCCTAAGCAAACTAATTCAGGAAGAGAAAACCAGATCCCACATATAAGTGGGAGCTAAACATTGAGTACATATAGACACAGAGAACGGAACAATAGATACTGGGGACTGCTTGATCGTGGGAGGGTGAGGAGAGGGCATAGGTTGGAAGGCTATCTGTGGGTACTATGCTCACTATCCTAGTGATGGGATCATTTGTACACCAAGCATCAGTGACCCACAATCTACCCATGTAACAAACCTGCACATGTACTCACTGAACCTAAATTTA [...]
+
diff --git a/demo/10k.fa b/demo/10k.fa
new file mode 100644
index 0000000..4c6dbde
--- /dev/null
+++ b/demo/10k.fa
@@ -0,0 +1,2 @@
+>chr5	50000	60000
+ATAAACCACATATTAATTGAGGTAAAATAAGTGGCCAGAGAACCCACATAATTTAGTTGCAGTAAACTTCTGCTGCATATTTAAAGGAAAATAAACGAAATAATAGTTTTCAAAACATAAAAATTATTCCACTCTTTCTGAAAACACACTGCTAATCTAAGCCTAACCAAAAAGCTAACCCTAACCCTACCACTAACCCTAATGCTACCACTAGCCTCTAACCCTACTGCTAACTCTAACACCTAACCCTAATCCCAAACCCCTAAACCAAACCCTAGTCCTGAACCCTAACCCTAACCCCAACCCAAACCCCAATACCGACACTACACTAATCATAACCCAACCCTAACTCTAACCGCAAAACCCTAACCCCTGACCCTAAACCCAACCCCAACCCAAAACCTAACTCTAACCCCTGAACCAAACCCTAAAACCCCACAAACCCCAAAAATATTCCAAACCGAACCTCACCCTAACCATAAATCCCAACCC [...]
diff --git a/demo/1M.fa b/demo/1M.fa
new file mode 100644
index 0000000..111bb53
--- /dev/null
+++ b/demo/1M.fa
@@ -0,0 +1,3 @@
+>chr3	50000	1050000
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN [...]
+
diff --git a/demo/1k.fa b/demo/1k.fa
new file mode 100644
index 0000000..42c353e
--- /dev/null
+++ b/demo/1k.fa
@@ -0,0 +1,3 @@
+>chr1	50000	51000
+TAAACAGGTTAATCGCCACGACATAGTAGTATTTAGAGTTACTAGTAAGCCTGATGCCACTACACAATTCTAGCTTTTCTCTTTAGGATGATTGTTTCATTCAGTCTTATCTCTTTTAGAAAACATAGGAAAAAATTATTTAATAATAAAATTTAATTGGCAAAATGAAGGTATGGCTTATAAGAGTGTTTTCCTATTGTTTTCAGTGTAGGACTCACTGTTCTAAATAACTGGGACACCCAAGGATTCTGTAAAATGCCATCCAGTTATCATTTATATTCCCTAACTCAAAATTCATTCACATGTATTCATTTTTTTCTAAACAAATTAGCATGTAGAATTCTGGTTAAAATTTGGCATAGAACACCCGGGTATTTTTTCATAATGCACCCAATAACTGTCATTCACTAATTGAGAATGGTGATTTAACAAAGGATAATAAAGTTATGAAACCAATGCCACAAAACATCTGTCTCTAACTGGTGTGTGTGT [...]
+
diff --git a/demo/54mer_hap1_1.100.fa b/demo/54mer_hap1_1.100.fa
new file mode 100644
index 0000000..61c83b5
--- /dev/null
+++ b/demo/54mer_hap1_1.100.fa
@@ -0,0 +1,200 @@
+>6:163296599:F:198;None;None/1
+GTGTGAGCCACCACGCCCAGCCCAAAATCTGTTTTAATGGTGGATTTGTGTTTC
+>3:153409880:F:224;None;3,153410143,G,A/1
+AGGAAGAGTTAATTTAAGTCACTTCAAACAGATTACGTATCTTTTTTTTCCCTC
+>Y:26750420:R:-132;None;None/1
+AACAACAGAAGTTAATTAGCTTCAAAAATACTTTATATTTGCAATTAAAAATAA
+>13:91170622:R:-276;None;None/1
+AAGCATTTATTGTTGTTTTTAAAGATTAAATGATTAAATGTTTCAAAAGGGACT
+>15:37079528:R:-240;None;None/1
+ACAGTGATGCCAAGCCAGTGGGTTTTAGCTTGTGGAGTTCCATAGGAGCGATGC
+>9:92308501:R:-176;None;None/1
+TCAAGAAATAACCATAAAAATGGGCAAAGCAGCCTTCAGGGCTGCTGTTTCTAC
+>3:109533106:R:-128;None;None/1
+ACTCCAAAGGGCATTTTGTGTCCTAGTGGAAAAATGTCACTGTGTTTTATAACG
+>16:32366683:R:-158;16,32366730,G,A;None/1
+ATGAGATTGGTCTGGGCAATGATTTTCTGGATATGACCCCCAAAGCACAGGCTA
+>13:111809970:R:-237;None;None/1
+TGAGGAGGTGGAAGGACACGTTAGTGGAGATCCTGGGCCTGAGGAGAACTAGCA
+>3:38871305:F:159;None;None/1
+GGAGAAGCAACAGGAACCTTGAAGGTACTCCCAGTCTCCAGCTTGGGCCCTGGA
+>19:8434785:F:295;None;None/1
+CCTTCTAATATCTGTGATTTGTCTGAGGTCTTGTTGTCTCAGGGACTCTCAAAC
+>8:132563397:R:-144;None;None/1
+TAAATTTTAAAGTCCCACTTGACCTCTGTCTCTAAACACAGCCTTTTCCTAAAC
+>3:190507952:R:-57;3,190507958,G,A;None/1
+ATCATTCATGGGTGAAAAAAGCCTGACTACCTATCTCGGTCAGTTATTAGAAGA
+>10:59883855:R:-120;None;None/1
+GAGTAGTAAATGAGGTACATGTCTAGTACAGTGCTTATTATATTATAATGGGCA
+>6:148630669:R:-271;None;None/1
+ATTGTTTCTCATAATTGCTTGCTGTTACGAGTCAGCTAAGGCTGCCTGTTGAGA
+>3:64518566:R:-93;None;None/1
+ACTTTCAGATATAGGGTAGAATAATTGGACAAGAGAAAAGTGCTCTATCCACAC
+>14:42514746:R:-187;None;None/1
+TTGAATAAAGAAAATGTGGTATACATACATACATACACCATAGAATAGCAAACT
+>7:126497810:R:-191;None;None/1
+ATCATGATTTATGGTGACTTGAAGCGTCTTTGCTTGGACAAGGTTTAGGAAGAG
+>21:15123085:R:-140;None;None/1
+ACGACTACTTAAGAATTTACAATAGTTGAAATTCCATTAAAGAGTTTATATGGC
+>2:217172401:R:-226;None;None/1
+AGTGGGAGAAGGTCAGAGAGTGAATTCCAAGGTGCTACATTTGTAGCATGAGGG
+>6:129456169:R:-167;None;None/1
+GCTCACCTATACCCATGTTAGTTGAGTCAGGAAACCACAATCAGTTTCTAGAAG
+>4:160333309:R:-131;None;4,160333230,T,G/1
+TAAAAGTAGGCTCTAAGGCTCTAATTAACTTGATATTTTTATGACCACTGACTA
+>3:37890126:R:-228;None;None/1
+CTAGTCTTCCTTAAAATACTTAATTTGGGTAGGTCTAAAAATCAGGGGACCTCT
+>17:49899522:F:154;None;None/1
+ATTTGTTAAAATGTGGATTTAGTTTTCATTATTTCTCGAAGATTCAAATACCTA
+>8:138662818:R:-211;None;8,138662609,C,T/1
+GAATAAGTGAAGAGTGATGTGCCGAGATCAGAAAGCATTAAACAGTCTTATCAC
+>3:97260940:R:-254;None;None/1
+ACATGGTTTGAACGTATCCCTGCGGAAATCACACCTCGGAGGGTAACAATGAGC
+>7:14401638:F:350;None;None/1
+TTAAAGATATGGGTAATCATTTATGGTTGCTTATTATAAGTTGATGTTGAAACA
+>13:19068477:F:246;None;None/1
+AGCACCTGTGTAACTTGGCCTCTCTAACCCTTGGTTTCTTCCCCAACCTCATTC
+>3:179655047:F:206;None;None/1
+AGTTTTTACCTCCTTCTTCTAAAATAAATAAGTCTTGATAGCCAATCCCATATC
+>20:15001091:F:174;None;None/1
+TGGGTGAACCAATTGAATAAAACTCTTTGCAACAATTCTTATGGGTAGAAACAG
+>2:54967337:R:-153;None;None/1
+ATCACTCTATTCTACAAAGAAAATATACCCAGTCAGCACAGGAAAACATGCTCA
+>18:39435195:F:202;None;None/1
+CATCATTCCTGACTCTGTATTTGTATGCTTTCAATAAGCAATCATTTTCCAAAA
+>X:68133790:F:167;None;None/1
+TGGTGTACGGCTACCTGGCTTTGATTCTACCACCTGTTCCCTGCCAGCTGTGTG
+>9:18241281:F:267;None;None/1
+ATAAAAACAGGAAAAATGATTAATAACATATAAAATAGTTTTCTGAAGACGGTG
+>7:141812631:F:239;None;None/1
+TCCAAGCAAATGTGTTTTCCCAGTCAGGATTAATGACCAGTACTACCCCGTACC
+>15:54133744:R:-269;None;None/1
+CTTAACTGCTCGGGAATAATCCTTGAGTGAACAATAGCTTGAATCTTCCTAAGA
+>4:66588719:R:-245;None;None/1
+CTTTCTATTAAAACAGATAAAAAAAAAATATATATATATATATATATGAAGGAA
+>1:217587184:F:229;None;None/1
+GCTATGCCCAGGGTTTTGATTAATTCACTTTTTGGCCATTCTTTTTCTTTTTCT
+>2:119213044:R:-184;None;None/1
+AGAGTTCCAGGAAGTTTTCTGCTCTGCAGGAGACTTGGATGTCCTGGGGAAAAT
+>2:36811349:F:265;None;None/1
+AAGAATGAGGATAAGCTGGACATTGAAGGGTGAGGAGGGCTAATACGAATTTGT
+>1:68786835:F:349;None;None/1
+TGTTTTGTGGCCTTCCCTGTTGATACCTCAATGTACAAGTCTAGGGTCTGGAGT
+>4:126634771:F:185;None;None/1
+TGATAGGATTCTCATAGGACAATTTCTAATCTGTGGTTCTGCTTGTAAATCTCT
+>4:114171402:R:-296;None;None/1
+CAGTTTTCAGCTCATCTTTTGAAATGTTTTAAACCTGTACTCATCTACGTCCTT
+>9:125206364:R:-140;None;None/1
+TGAGTATCCTTCTAAGGGCACCTAGAAATTCTCAGCCTAACATCCATCACGAAC
+>X:25208394:F:245;None;None/1
+CAAATGATGTGGGGAGAGACGCATCTTCTGACAGCTATTACAGTGATTTCTCGC
+>2:222217158:F:227;None;None/1
+TTTTCCTGAAATCATGAGTACATGCTTGCATATCCAACCGTGGAGGGCCTGGCG
+>1:196837088:F:230;None;None/1
+AGATTCAGATAATCTATGTTCATGCATAGGAGAATATACTCTAGAAACAGCAAG
+>7:24300836:R:-208;7,24300863,*,+1T;None/1
+GTTCATCAATCTAGCCTCTGCTACTACTCTCTCTGTTCCTGTCAAGTAGAGCTC
+>1:76948473:F:230;None;None/1
+GGTGATAGAGTGAAGATTTTACTAAGGAAATTAACACTTAAGATTTAATGGTTT
+>8:62727783:F:135;None;None/1
+AAAATTTGAGGTGCTAAATCCAAACACACATTAAATGATAAAACTTTTAAAAAG
+>7:94677617:R:-155;None;None/1
+ATAAACAAAAACAACCAAAATATCCTTTCAAATAAAAATAGAAACCAGAAATAA
+>4:10707991:F:305;None;None/1
+CTACTCAGGAGGCTGAGACAGGAGAATCGCTTGAACCCAGGAGGCGGTGGTTGT
+>1:183203391:R:-141;None;None/1
+CTCTAGTAATAGTAACTACTAGGATGACAATAGTAATTTTTATTATCTTTTACT
+>15:63526307:R:-83;None;None/1
+AATCGGGAGAATATTCTGATGTGCATCTTGTCCACTACTTTCAACACAACCTTG
+>8:79068236:F:137;None;None/1
+AGTTATGCAGTAAATCAAGCCACATATATATGAGTGATCCTTAAATGTATCCCT
+>9:1290219:R:-251;None;None/1
+TAATAGCTGGGGATTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG
+>5:122248402:R:-212;None;None/1
+TCTTGAGCTCTCCATTCAGAGACTACATACCTTCTGCTAGAACAGGGGCTTCTC
+>X:153897575:R:-263;None;None/1
+GCTCAAGCAATTCTCATGCCTCAGCCTCCTGAGTAGCTAGGACTATGGTCACAC
+>6:95453360:F:204;None;None/1
+AGTTGTGGTTTGTAATCGTCTGAAAAAAAGAGATAAAATTGAGCCTCTTTTATT
+>10:6951101:R:-212;None;None/1
+TGTTAACATGATGTGGTAGAAAGTCCCCAAGATGGTCCCCAGCAATCCTCATTA
+>13:59325230:F:98;None;None/1
+ATAAGAAACTTGTCTAAGATTATACCTATAACAAGCTGGGGTTTCAACTCCAGG
+>3:37059536:F:184;None;None/1
+GTTAAAAATCATACTACCTGTTGATATGGTTGTATATCCGCTTTTTTCATACGA
+>9:91156368:F:149;None;None/1
+GGGGCTGAACCCTCAAAGATTATGTTTCCCAGTTACCATTTCGTATCTTATGCC
+>12:55970708:R:-246;12,55970762,C,T;None/1
+AACCCAGGAGGCAGAGGTTGTGGTGAGCCAAGATCGTGCCATTGCACTCCAGCC
+>3:120828442:F:203;None;None/1
+GTGCTACTTCTAGAAGGGTGTTTTCATCATCTTCTAGCCCATTGGTAGGTCCAG
+>19:60104965:R:-147;None;None/1
+AGAGGCACCCATAACCACGACCAGCTAATTTTCGTATTTCTAATAGAGATGGTG
+>11:98782460:R:-46;None;None/1
+AATGAGCATAGGCAGTAGTTAGGGAGTAGTTGCAGCAAGCCCTGGGTGAGAATC
+>12:91111930:F:209;None;None/1
+TTGGCGGGGGGATTACTAAAATCTCTTAACCTCATAAGTTTGTCAGTGTAATTT
+>3:192601514:F:196;None;None/1
+TGGGTGGAGTGTTCTATATACTCACTCTCCTTGACTTATGTTGGGGTTATGTCT
+>X:47989386:R:-46;None;None/1
+GTTACCCTGGCTTAATGGTCAGCATTATGTCATCGTCCCACAGCATCGCAGACA
+>2:39824446:R:-127;None;None/1
+TGAAGGAATACTGGAGGCTAGAGAATTTGTAAAGAAAAGAGGTTTACTTGGCTC
+>1:227528596:F:219;None;None/1
+ATGAACAGGGGAAATGCAGCCATTTGAATTTAATCAGAGGAACAAAAGGGAGTC
+>X:4440324:R:-90;None;None/1
+AGTTTGCTACCGGATCCATAAGTCCAGCTTCTGCTCTTGTAGTAGACCAGATTC
+>X:73634281:F:88;None;None/1
+AATGTTTCCTTCTTCAAACCAGTGCCCGTGATTTAGCTTTTATTGACGTTGGTA
+>3:53909823:F:239;None;3,53910104,T,C/1
+ACATTGGTTAAAGTCAGGCACACTTTTTTTTATTAGTGTTCACTGTAATCTCCT
+>3:170059370:R:-173;None;3,170059212,G,C/1
+TACAATGTTACACAGCATGCTGTTGGCACTGTTTTAAGCAAACAAGTTATGCGC
+>20:8823533:F:251;None;None/1
+GTGGTGGATGTCTCAGCAGAGGCCTGCATAGAAAGACGACATTGAGGATCAGTG
+>5:106802036:F:200;None;None/1
+TTTTGCTCTCAGTTGTGTCCCAGCAACCATGAGCTGGAAATCCAGAGACTCCTT
+>5:74632980:R:-252;None;None/1
+CTATTTGAAAATGCGTTTCAAACATGTGTGTTCTCATGTATCGAGATCATTACT
+>12:84751900:F:268;None;None/1
+TAATCTTGCAAATAATTTTCACCTGTGGTCTCACCTCTGCTAAGGTGAAAAAAC
+>X:135811790:R:-164;None;None/1
+TTATTTATTTTTTATGTGATAATTACCTTTATAAAAAATTGTTGGGCGGGCATG
+>10:53108829:R:-138;None;None/1
+AAATAAGGGAGGAATAACATCTAATTGTCAGAGACCCTGTCCTCCTTCTTGGAT
+>11:83660431:R:-245;None;None/1
+TCTCTCTAATTATATTGTTACTTTTGGGGTAAATGTGAATAGGAAAGTGAGTTA
+>10:119650756:F:207;None;None/1
+AGAGCACACTGTCTTCCCCTTTTGTGCTCTTAGTATGTTTGCAGCGATCAACTC
+>2:154848222:R:-185;None;None/1
+TATATATAGTAAACAAATCCGAAATTAAAGATACAATTCCCTCAGTGTTTGGAA
+>3:63918850:F:276;None;None/1
+ACACTCCTGTGATCTTACAGCTTTCTCTCGTTGCACATATTACAAATCCTGTAA
+>10:11447534:F:239;None;None/1
+GTTGTGGTGTAGGGTCATGCACGGATTAAGAGCTGAGTTGTGGTGGAGGGTCAT
+>1:57651555:R:-316;None;None/1
+AGACTTGAAAGTCAGAATTACTCCTTGATCCATGGGCTGCAGAGTAGATGTTTT
+>X:91671697:R:-206;None;X,91671506,G,A/1
+ACTTCTTTGGTTCAAAGGTTTCTTTTCTTTTCATCAGAGTATTTTGAATCACTT
+>12:101622421:R:-187;None;None/1
+CATCAAAAGGCTATTGTTAATTACAGTTTTTGGAGCAGAAAAATCGGAAAAGCC
+>13:43163355:R:-200;None;None/1
+GAGAACAGAGGCAAATCAAAGACTTGTTTTGGTACTTACTTATCAAAAATAAGG
+>19:37349825:R:-160;None;None/1
+CACCATGTTGGCCAGGCTGGTCTCCAACTTCTCGGTTCAAGTGATCCATTTTCC
+>8:41173199:R:-175;None;8,41173056,T,C/1
+CTCAGAAATGTTAAGGAACTTTAATGACAATTATAAACACCTTGGTCTCCGCGC
+>16:29068055:R:-232;None;None/1
+CGACATACCCACAAAAAAAGAAATTTGATACACACCCAATAGGCTACTGGCCAC
+>1:4694340:F:179;None;None/1
+GTTAAGTGACTTGCCTGAGGTCACACAGCCTGAAGGGGGTGGAGTCAGGATTCA
+>17:76451446:F:221;None;None/1
+GCCAGAAGCTCAGAGGGATTCATGGGGAGGCCTTAAAGAAGGTGGGCTCGGGCA
+>1:242817485:F:158;None;None/1
+CATTTGTTTTCAAACAAAGCACGTATGTGCATTTCTTTAAAAAAGCAATTATGA
+>18:29611708:R:-146;None;18,29611585,G,A/1
+ACCACGATGCCCTACTCACACTCTGTGCTCCAGCAACAATACATTGCTTAAAAT
+>9:36493018:F:206;None;None/1
+AGTGAGCAGAGATTGCACCATTGCACTCCAGCCTGGGCAACAAGATTGAAACTC
+>9:16274705:F:129;9,16274709,C,A:9,16274719,C,G:9,16274741,A,T;9,16274862,A,G/1
+TTTCACATGCTGTTGTAGTGATAGGGAATAAGTCTCTCGAGATCTGACGGTTTT
diff --git a/demo/54mer_hap1_1.100.fastq b/demo/54mer_hap1_1.100.fastq
new file mode 100644
index 0000000..57c63cf
--- /dev/null
+++ b/demo/54mer_hap1_1.100.fastq
@@ -0,0 +1,400 @@
+ at 6:163296599:F:198;None;None/1
+GTGTGAGCCACCACGCCCAGCCCAAAATCTGTTTTAATGGTGGATTTGTGTTTC
++
+%/978679:99788::8888689788888888986637/%/*%--/11,%*/*%
+ at 3:153409880:F:224;None;3,153410143,G,A/1
+AGGAAGAGTTAATTTAAGTCACTTCAAACAGATTACGTATCTTTTTTTTCCCTC
++
+%-;;8;;9289.73697758:5,2984,54687/*8,'.001/550'+'.(9((
+ at Y:26750420:R:-132;None;None/1
+AACAACAGAAGTTAATTAGCTTCAAAAATACTTTATATTTGCAATTAAAAATAA
++
+E=>FED>D=FEEEFFAB>A=ABDEFEFA8AAE<=D>DC:AF==>C>=;F>F?:.
+ at 13:91170622:R:-276;None;None/1
+AAGCATTTATTGTTGTTTTTAAAGATTAAATGATTAAATGTTTCAAAAGGGACT
++
+>A:ABC@=ACCCB>8:6-9'5/4'65,)2+/:3,1,.&2&.331).522&&&)4
+ at 15:37079528:R:-240;None;None/1
+ACAGTGATGCCAAGCCAGTGGGTTTTAGCTTGTGGAGTTCCATAGGAGCGATGC
++
+ACB?;?=@??@BBA@::=@@?@B?>7?8.3?7A@<.7+7=@>)3033>43&&-6
+ at 9:92308501:R:-176;None;None/1
+TCAAGAAATAACCATAAAAATGGGCAAAGCAGCCTTCAGGGCTGCTGTTTCTAC
++
+A<CDBAEDEBAEBDD?A9B?EC9;<?<?704:;6;C97..0/1<8:710<227:
+ at 3:109533106:R:-128;None;None/1
+ACTCCAAAGGGCATTTTGTGTCCTAGTGGAAAAATGTCACTGTGTTTTATAACG
++
+FB@??ACE=7;;BDD at E/&72;/<-'45-&09+108,0)2=5>-4,6)-+''2&
+ at 16:32366683:R:-158;16,32366730,G,A;None/1
+ATGAGATTGGTCTGGGCAATGATTTTCTGGATATGACCCCCAAAGCACAGGCTA
++
+ at CCA>@=CCCB at AB?7>@:@>@>ACA>A=?9?;AA at 660&//4146200/&3'5
+ at 13:111809970:R:-237;None;None/1
+TGAGGAGGTGGAAGGACACGTTAGTGGAGATCCTGGGCCTGAGGAGAACTAGCA
++
+@@C>AEEEEEEEA<B=BB@?A>EEEB at BB<DDDD==??9&7<7<<=A?8?B<<4
+ at 3:38871305:F:159;None;None/1
+GGAGAAGCAACAGGAACCTTGAAGGTACTCCCAGTCTCCAGCTTGGGCCCTGGA
++
+%-64446643443442/234444632///2-&-1/-*++-++/101-+(++(+-
+ at 19:8434785:F:295;None;None/1
+CCTTCTAATATCTGTGATTTGTCTGAGGTCTTGTTGTCTCAGGGACTCTCAAAC
++
+%/8677799997469967788997687666995576655577405242/57527
+ at 8:132563397:R:-144;None;None/1
+TAAATTTTAAAGTCCCACTTGACCTCTGTCTCTAAACACAGCCTTTTCCTAAAC
++
+:>:<<66<:::=::=;::::<<::=<:=6=<:A::<?<=6:6;::<::>:6:<:
+ at 3:190507952:R:-57;3,190507958,G,A;None/1
+ATCATTCATGGGTGAAAAAAGCCTGACTACCTATCTCGGTCAGTTATTAGAAGA
++
+@@@A>:>077?==9:==(8/;;'++)2,),%%1.2%%%.%+),/+&%&%2,+%%
+ at 10:59883855:R:-120;None;None/1
+GAGTAGTAAATGAGGTACATGTCTAGTACAGTGCTTATTATATTATAATGGGCA
++
+ at C=DDDECE@<>A6A?59?16;=A:51'6-37563/&+172<<9<CA?1:+1+/
+ at 6:148630669:R:-271;None;None/1
+ATTGTTTCTCATAATTGCTTGCTGTTACGAGTCAGCTAAGGCTGCCTGTTGAGA
++
+?D at CE????6BC92:A9>A11;24788&3/537@<)/';(/'7-5&12;7;)((
+ at 3:64518566:R:-93;None;None/1
+ACTTTCAGATATAGGGTAGAATAATTGGACAAGAGAAAAGTGCTCTATCCACAC
++
+>??>><E?)&3A4+4B*<=>2;;BE77;CB=+E);;<>?EB;B:5,9>E)B<00
+ at 14:42514746:R:-187;None;None/1
+TTGAATAAAGAAAATGTGGTATACATACATACATACACCATAGAATAGCAAACT
++
+ at C=147=)7*C?3?CB>B=6=>=77&7<>=BC3.9>BCBBAC3CC:=.0;?41(
+ at 7:126497810:R:-191;None;None/1
+ATCATGATTTATGGTGACTTGAAGCGTCTTTGCTTGGACAAGGTTTAGGAAGAG
++
+EBBDEEEEEEB?BEA>@BDBBEDBA9<@A:9==?:53>-;?7<>=55>>=B=A?
+ at 21:15123085:R:-140;None;None/1
+ACGACTACTTAAGAATTTACAATAGTTGAAATTCCATTAAAGAGTTTATATGGC
++
+CCCBBBAABAB<A;@==BA?;>@?91=7?5:@?1==BB>5<AB5+237?85122
+ at 2:217172401:R:-226;None;None/1
+AGTGGGAGAAGGTCAGAGAGTGAATTCCAAGGTGCTACATTTGTAGCATGAGGG
++
+CAAAA@?@CCCA>A=BA?;?>CC=@A=B=;AC;47@=>:=6>>:;+6>>/)06@
+ at 6:129456169:R:-167;None;None/1
+GCTCACCTATACCCATGTTAGTTGAGTCAGGAAACCACAATCAGTTTCTAGAAG
++
+ at BBCBB=AB573;426/384452&.6.3&14-2102.&241+672685::4,&)
+ at 4:160333309:R:-131;None;4,160333230,T,G/1
+TAAAAGTAGGCTCTAAGGCTCTAATTAACTTGATATTTTTATGACCACTGACTA
++
+BDDDDD>DDDDBC<@BD at DD>7AB at C;;:685=877:778:66662,4112)&-
+ at 3:37890126:R:-228;None;None/1
+CTAGTCTTCCTTAAAATACTTAATTTGGGTAGGTCTAAAAATCAGGGGACCTCT
++
+BACABAB<AB?>BC=BA<3<<:A>:28(954-6B8>70<44570038.67&/9)
+ at 17:49899522:F:154;None;None/1
+ATTTGTTAAAATGTGGATTTAGTTTTCATTATTTCTCGAAGATTCAAATACCTA
++
+C<AAC@;;>2>4=>A9<8:,9:3;=,*6%7*/00.(,44449*%+%4+%,(%*,
+ at 8:138662818:R:-211;None;8,138662609,C,T/1
+GAATAAGTGAAGAGTGATGTGCCGAGATCAGAAAGCATTAAACAGTCTTATCAC
++
+BBBA?@@A at A@9<;:4)7985)3&&.<'-9A8&&.+&+&11'+&&4&&)-&&61
+ at 3:97260940:R:-254;None;None/1
+ACATGGTTTGAACGTATCCCTGCGGAAATCACACCTCGGAGGGTAACAATGAGC
++
+D at 639BDBEB8()/7;11?9-4&//9&822)&/3-&&&&)&))&&0&-402&)&
+ at 7:14401638:F:350;None;None/1
+TTAAAGATATGGGTAATCATTTATGGTTGCTTATTATAAGTTGATGTTGAAACA
++
+BBBCBB6::=:=<*9;1/72:137.59235(+5,,5.+5/....7111...650
+ at 13:19068477:F:246;None;None/1
+AGCACCTGTGTAACTTGGCCTCTCTAACCCTTGGTTTCTTCCCCAACCTCATTC
++
+0AEDC=CEDEDEEECEEEEEDDDEEEA<>;@A?CEAB>9ACBEDB<7%7%:+3)
+ at 3:179655047:F:206;None;None/1
+AGTTTTTACCTCCTTCTTCTAAAATAAATAAGTCTTGATAGCCAATCCCATATC
++
+<GGEACE<?E/DF at EE/EE;BCG@;CCEEBD:(-ECAGEDC7.E/FG/FB/>?:
+ at 20:15001091:F:174;None;None/1
+TGGGTGAACCAATTGAATAAAACTCTTTGCAACAATTCTTATGGGTAGAAACAG
++
+CCCCCCBBBCCCCCCCA??CCAA??BCCCCCBBBCBBB?5?>AB;@@;BACA=4
+ at 2:54967337:R:-153;None;None/1
+ATCACTCTATTCTACAAAGAAAATATACCCAGTCAGCACAGGAAAACATGCTCA
++
+?E:==B?>?BC9C?A?69CD(898=5<<5:)&642BEF7&(+4..A at F>1/-&3
+ at 18:39435195:F:202;None;None/1
+CATCATTCCTGACTCTGTATTTGTATGCTTTCAATAAGCAATCATTTTCCAAAA
++
+:?EEDADBCEEEDDAEEEDABCEEEBAEED=>CCCCDCD>ABEEE?C3?EEE:'
+ at X:68133790:F:167;None;None/1
+TGGTGTACGGCTACCTGGCTTTGATTCTACCACCTGTTCCCTGCCAGCTGTGTG
++
+CCCCCCCCCCA<@@CC?=ACBBBCC at BC?5?ACCA=1=BB;??6B>>7>>79<;
+ at 9:18241281:F:267;None;None/1
+ATAAAAACAGGAAAAATGATTAATAACATATAAAATAGTTTTCTGAAGACGGTG
++
+EEEEEEED at DCABD;DB5CEEBC at 3<DAA@>=<@8<B@<>=@9=>@BA8 at EC=(
+ at 7:141812631:F:239;None;None/1
+TCCAAGCAAATGTGTTTTCCCAGTCAGGATTAATGACCAGTACTACCCCGTACC
++
+FFFFFFEEADGCBA??BEAACAEC?B9AC?:??<?EEFB<:B:<1,))%&.9=(
+ at 15:54133744:R:-269;None;None/1
+CTTAACTGCTCGGGAATAATCCTTGAGTGAACAATAGCTTGAATCTTCCTAAGA
++
+C;A;88;//,%%0(-8(((++'(-(/%%++&%02))((0%%(./',3+'-%%,)
+ at 4:66588719:R:-245;None;None/1
+CTTTCTATTAAAACAGATAAAAAAAAAATATATATATATATATATATGAAGGAA
++
+B;A=;C;22555:7256&7+6/-&3+43474.00/8&)7A<,2/99(+;7(2<,
+ at 1:217587184:F:229;None;None/1
+GCTATGCCCAGGGTTTTGATTAATTCACTTTTTGGCCATTCTTTTTCTTTTTCT
++
+1 at EEC>>CEEA5 at ADB@<7<8<@D:<067C<(6=;=1:(795:5%378083+&%
+ at 2:119213044:R:-184;None;None/1
+AGAGTTCCAGGAAGTTTTCTGCTCTGCAGGAGACTTGGATGTCCTGGGGAAAAT
++
+CCCACC at A=;<77>>:7,74;1173.3;47(.6'''.(%)25;53(7,53,0&,
+ at 2:36811349:F:265;None;None/1
+AAGAATGAGGATAAGCTGGACATTGAAGGGTGAGGAGGGCTAATACGAATTTGT
++
+CCCCB>BBBACBA<6>9<?99,35*19<13+50,02-+/4*&-&7//-+(%6(-
+ at 1:68786835:F:349;None;None/1
+TGTTTTGTGGCCTTCCCTGTTGATACCTCAATGTACAAGTCTAGGGTCTGGAGT
++
+CEBC?EAEEEEDEE@<A;=BDBBEEC8*;B=2>9@=8<:%:?CC9*:/3>@8;9
+ at 4:126634771:F:185;None;None/1
+TGATAGGATTCTCATAGGACAATTTCTAATCTGTGGTTCTGCTTGTAAATCTCT
++
+ABCCC?=?BACCBBBCC?<B at A=@:AB>A996%45*.0-&&''%-'19'/%5,<
+ at 4:114171402:R:-296;None;None/1
+CAGTTTTCAGCTCATCTTTTGAAATGTTTTAAACCTGTACTCATCTACGTCCTT
++
+DD>2>BADDDDCB at BDDDAD9)4>??9>@?9<47'41970<557>7:@9&&+<>
+ at 9:125206364:R:-140;None;None/1
+TGAGTATCCTTCTAAGGGCACCTAGAAATTCTCAGCCTAACATCCATCACGAAC
++
+6?9:<+4=?=BDCABD7=@D>C;14@?>5;;>>68?<D<2>-<9-8A:=:<+74
+ at X:25208394:F:245;None;None/1
+CAAATGATGTGGGGAGAGACGCATCTTCTGACAGCTATTACAGTGATTTCTCGC
++
+C=CCCCACCA=A>=5:3>;?<,623*77<391+%./(,.*+*0(((+46*&&*+
+ at 2:222217158:F:227;None;None/1
+TTTTCCTGAAATCATGAGTACATGCTTGCATATCCAACCGTGGAGGGCCTGGCG
++
+BDC>DCCCBC758026=:>>*7015;,'&)0,*7-%%+5%.*'.,7))*%*%2%
+ at 1:196837088:F:230;None;None/1
+AGATTCAGATAATCTATGTTCATGCATAGGAGAATATACTCTAGAAACAGCAAG
++
+AA at B-',6A4A3;A<>>@?5>C;62;.8+%88+6,./,)8'/+,-+,+-%8%'7
+ at 7:24300836:R:-208;7,24300863,*,+1T;None/1
+GTTCATCAATCTAGCCTCTGCTACTACTCTCTCTGTTCCTGTCAAGTAGAGCTC
++
+EF?@BCDA at B>:;29A=:<?:+6CDC<?6?C8:63666A;2429'94;1?2<2/
+ at 1:76948473:F:230;None;None/1
+GGTGATAGAGTGAAGATTTTACTAAGGAAATTAACACTTAAGATTTAATGGTTT
++
+C:8 at BBEE?<8BACDBC4?A6;<10=?@93<4<<(.,0'/%-7''0''/8(1'1
+ at 8:62727783:F:135;None;None/1
+AAAATTTGAGGTGCTAAATCCAAACACACATTAAATGATAAAACTTTTAAAAAG
++
+A8ABCCA69?<ABBA?;;87=2;?=1=07.2'8,75'/1917(.1%43&7%73%
+ at 7:94677617:R:-155;None;None/1
+ATAAACAAAAACAACCAAAATATCCTTTCAAATAAAAATAGAAACCAGAAATAA
++
+)@=B=<7<9-926>>):A at DB@A2;26>7(7D<>B?9(6((1<A=6(734<@((
+ at 4:10707991:F:305;None;None/1
+CTACTCAGGAGGCTGAGACAGGAGAATCGCTTGAACCCAGGAGGCGGTGGTTGT
++
+EDCDEDDDEDEEEDBCE>EEEEEDAC at D?>B@<B?=??@@D:0;926589=45&
+ at 1:183203391:R:-141;None;None/1
+CTCTAGTAATAGTAACTACTAGGATGACAATAGTAATTTTTATTATCTTTTACT
++
+CCCCCCCCCA>ABCCCCCBB at CCBBAA@ABCCC at B@CCBA?ACCAA>@@<:@A@
+ at 15:63526307:R:-83;None;None/1
+AATCGGGAGAATATTCTGATGTGCATCTTGTCCACTACTTTCAACACAACCTTG
++
+E<7;@>*ED48&&)+&&+0&&&)/&/&/)&&&&&&)0'&&-3-0&&)&//40+-
+ at 8:79068236:F:137;None;None/1
+AGTTATGCAGTAAATCAAGCCACATATATATGAGTGATCCTTAAATGTATCCCT
++
+EED<CEEB@>CDCCECEE at 6<;DEE at 0:A80/1;00;7 at 72=04:4%306466)
+ at 9:1290219:R:-251;None;None/1
+TAATAGCTGGGGATTTTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTGTG
++
+DDBCDDA<@B?:?D?<?CEC?DEDABB@=<A at 8@ABC>BC>DDA6,3;:4-165
+ at 5:122248402:R:-212;None;None/1
+TCTTGAGCTCTCCATTCAGAGACTACATACCTTCTGCTAGAACAGGGGCTTCTC
++
+BBBED??C8>C59>>>=>:9<><E636@>==<8;===;>;;;;>?>=8745:2+
+ at X:153897575:R:-263;None;None/1
+GCTCAAGCAATTCTCATGCCTCAGCCTCCTGAGTAGCTAGGACTATGGTCACAC
++
+EA>>BEDBDC?CEDA?@C at CC;?@?>5>@8<CB::A;CC727916;588703:=
+ at 6:95453360:F:204;None;None/1
+AGTTGTGGTTTGTAATCGTCTGAAAAAAAGAGATAAAATTGAGCCTCTTTTATT
++
+:5A/?@B>BC5/2?55'+89=-=1=>5(<(/<(9<&5%0=%&(33%'((2./-+
+ at 10:6951101:R:-212;None;None/1
+TGTTAACATGATGTGGTAGAAAGTCCCCAAGATGGTCCCCAGCAATCCTCATTA
++
+ at DA=AEECEBACABEB?ADA79?EDB;AB:8><>?6?;;C:8588:8:7:61/8
+ at 13:59325230:F:98;None;None/1
+ATAAGAAACTTGTCTAAGATTATACCTATAACAAGCTGGGGTTTCAACTCCAGG
++
+CCCCBBBBAABABCAABAA>A?>ABA@?6<B;==A=9??<=?=8=<27<89:;5
+ at 3:37059536:F:184;None;None/1
+GTTAAAAATCATACTACCTGTTGATATGGTTGTATATCCGCTTTTTTCATACGA
++
+EE>BA??CAB:CC==<9>3*;A(4<@/65)3'%32'%%,%:'%6-@>0./))38
+ at 9:91156368:F:149;None;None/1
+GGGGCTGAACCCTCAAAGATTATGTTTCCCAGTTACCATTTCGTATCTTATGCC
++
+C?4>A17?<@>34?5 at A>B8'8A:>0<47;=.<<:-6.,=:9(%-,-7%'%7%(
+ at 12:55970708:R:-246;12,55970762,C,T;None/1
+AACCCAGGAGGCAGAGGTTGTGGTGAGCCAAGATCGTGCCATTGCACTCCAGCC
++
+B at B<EDEEEEEDDD@;ADEC:BCCECA>ABDDEEECEB</<B8:A<ECA336A:
+ at 3:120828442:F:203;None;None/1
+GTGCTACTTCTAGAAGGGTGTTTTCATCATCTTCTAGCCCATTGGTAGGTCCAG
++
+>BDB?>?89 at CFBB<BC@;@A887,2;7>;/<;<(%6?8'7';.57%1,71638
+ at 19:60104965:R:-147;None;None/1
+AGAGGCACCCATAACCACGACCAGCTAATTTTCGTATTTCTAATAGAGATGGTG
++
+ABB?C?CAAA+/5)8C6+9*=-CCFFC;>?B>'4&&&/21&&&)/<8+(+-&)6
+ at 11:98782460:R:-46;None;None/1
+AATGAGCATAGGCAGTAGTTAGGGAGTAGTTGCAGCAAGCCCTGGGTGAGAATC
++
+A49C<,;73<>;C?>888<):?ED>?(3A@=8A8/)-/?93)31?0&2A:''-9
+ at 12:91111930:F:209;None;None/1
+TTGGCGGGGGGATTACTAAAATCTCTTAACCTCATAAGTTTGTCAGTGTAATTT
++
+EEECDDEECECBFEEEEB?DECDACEEEEDD>28?C?ACEAEEEC>636?A;EE
+ at 3:192601514:F:196;None;None/1
+TGGGTGGAGTGTTCTATATACTCACTCTCCTTGACTTATGTTGGGGTTATGTCT
++
+EDCDEEE@;DEEEDE?C8>CADEDA8ED<?E=ABB<3:CB=7:@7 at E><4,<8.
+ at X:47989386:R:-46;None;None/1
+GTTACCCTGGCTTAATGGTCAGCATTATGTCATCGTCCCACAGCATCGCAGACA
++
+EDDDEEEEA>B@?DDEDDEEBC>/42<.318&6<>57A<2>B;275(895.9/9
+ at 2:39824446:R:-127;None;None/1
+TGAAGGAATACTGGAGGCTAGAGAATTTGTAAAGAAAAGAGGTTTACTTGGCTC
++
+DC:ACBB>7=DCDFCEFBB?A9>4;B=?56606C6-4<?8<742984<04C>DF
+ at 1:227528596:F:219;None;None/1
+ATGAACAGGGGAAATGCAGCCATTTGAATTTAATCAGAGGAACAAAAGGGAGTC
++
+ at EEEBC@DBEEEEEB:=@9C:==?AB>BB8 at 64><8==@>E?>@<707:9+70.
+ at X:4440324:R:-90;None;None/1
+AGTTTGCTACCGGATCCATAAGTCCAGCTTCTGCTCTTGTAGTAGACCAGATTC
++
+C=?;?=@>;9;?;8/9<>1?=91.796(,(702;497;7&2/&)0&-01)+./8
+ at X:73634281:F:88;None;None/1
+AATGTTTCCTTCTTCAAACCAGTGCCCGTGATTTAGCTTTTATTGACGTTGGTA
++
+BBB8B at BCCCAABCCC@BBA7@>CA<7::A5?9?:9.,.,%*.'+%-%,-%%1,
+ at 3:53909823:F:239;None;3,53910104,T,C/1
+ACATTGGTTAAAGTCAGGCACACTTTTTTTTATTAGTGTTCACTGTAATCTCCT
++
+ADDEEDCDADEEEC=CEECEEEEDDDB@;@BEEE8.,9,-,,&-275),1*)1.
+ at 3:170059370:R:-173;None;3,170059212,G,C/1
+TACAATGTTACACAGCATGCTGTTGGCACTGTTTTAAGCAAACAAGTTATGCGC
++
+E9(7BEE>)477(7;7(58))0<8*33<8)3+0&27+/718&0=7&&/&))&)1
+ at 20:8823533:F:251;None;None/1
+GTGGTGGATGTCTCAGCAGAGGCCTGCATAGAAAGACGACATTGAGGATCAGTG
++
+CEEECEA?AEEEEE;>CEA5AEEEEB5?EEEEEEEAE>EE=AD at E?B>BBD at BB
+ at 5:106802036:F:200;None;None/1
+TTTTGCTCTCAGTTGTGTCCCAGCAACCATGAGCTGGAAATCCAGAGACTCCTT
++
+EECEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEED?.<EAABCBD=@>AB?D?=
+ at 5:74632980:R:-252;None;None/1
+CTATTTGAAAATGCGTTTCAAACATGTGTGTTCTCATGTATCGAGATCATTACT
++
+DDA;;919A:5 at 9./<<?A?5'45(2&.,'9@'-&&065+&&&+/,7B9&&&21
+ at 12:84751900:F:268;None;None/1
+TAATCTTGCAAATAATTTTCACCTGTGGTCTCACCTCTGCTAAGGTGAAAAAAC
++
+EEEEEEEEEEBBBEEEC at CCCE@@BECA3DBEEA<A;A<?C<7=<?@9.::=@*
+ at X:135811790:R:-164;None;None/1
+TTATTTATTTTTTATGTGATAATTACCTTTATAAAAAATTGTTGGGCGGGCATG
++
+EEAB>E>=@2>@A:@@;A=87=A7/<<:>6&939)+.00&6&72&))0&&0/&2
+ at 10:53108829:R:-138;None;None/1
+AAATAAGGGAGGAATAACATCTAATTGTCAGAGACCCTGTCCTCCTTCTTGGAT
++
+A;CDDDDC<6<A@>DDD:B>ACDD at D>'7@=D870&+357)&6:=83/<B6&9D
+ at 11:83660431:R:-245;None;None/1
+TCTCTCTAATTATATTGTTACTTTTGGGGTAAATGTGAATAGGAAAGTGAGTTA
++
+>EHCE<CCBA?DDCBABAEDDDAHHCB at CBBDC@=BA>5>A;:<<1&7()3-//
+ at 10:119650756:F:207;None;None/1
+AGAGCACACTGTCTTCCCCTTTTGTGCTCTTAGTATGTTTGCAGCGATCAACTC
++
+=DEEEED at DEDABEEEECE@0?DEC?CDDE?EEEDDD?E>7=?<@?>1>>:<;1
+ at 2:154848222:R:-185;None;None/1
+TATATATAGTAAACAAATCCGAAATTAAAGATACAATTCCCTCAGTGTTTGGAA
++
+5CDDDEDE0*?(3(&&)28&1&)&&.&))2(&&-)2++&&',3+&-7)4+-)&&
+ at 3:63918850:F:276;None;None/1
+ACACTCCTGTGATCTTACAGCTTTCTCTCGTTGCACATATTACAAATCCTGTAA
++
+CCCCCCC>CB4>CA6*/';0::8,-0399&13.&++'16330=63%51=.3/=7
+ at 10:11447534:F:239;None;None/1
+GTTGTGGTGTAGGGTCATGCACGGATTAAGAGCTGAGTTGTGGTGGAGGGTCAT
++
+BCBBA*>B5?C5@=CCCAACCBC?<C:CC@<?ACA=?83=9AC=<;?<7<@?A?
+ at 1:57651555:R:-316;None;None/1
+AGACTTGAAAGTCAGAATTACTCCTTGATCCATGGGCTGCAGAGTAGATGTTTT
++
+BFFEFHCCC<AD7B7A?:?<BFFEFC=8<C?B at 5C<69A9CBBAD<3<6<4/<<
+ at X:91671697:R:-206;None;X,91671506,G,A/1
+ACTTCTTTGGTTCAAAGGTTTCTTTTCTTTTCATCAGAGTATTTTGAATCACTT
++
+D at BDD>EE=A8 at BDE<?D?@DA8>D at B<B>7:??E=D?>E<9884+6=835(28
+ at 12:101622421:R:-187;None;None/1
+CATCAAAAGGCTATTGTTAATTACAGTTTTTGGAGCAGAAAAATCGGAAAAGCC
++
+DEC;DD=ABEE?EA9;EC><>:7>@==A@;:2;;EC;=;5(6+))18;>;0)-.
+ at 13:43163355:R:-200;None;None/1
+GAGAACAGAGGCAAATCAAAGACTTGTTTTGGTACTTACTTATCAAAAATAAGG
++
+A:A7;8'/:17@,%./'<@@81(2/',-7:1137.0+0,-0),%)0&/-.%%%)
+ at 19:37349825:R:-160;None;None/1
+CACCATGTTGGCCAGGCTGGTCTCCAACTTCTCGGTTCAAGTGATCCATTTTCC
++
+CBACB@<B>C@@9CA686..)1'+&,,1&&&3&&&&&&)&&1)-')&&1&)&0)
+ at 8:41173199:R:-175;None;8,41173056,T,C/1
+CTCAGAAATGTTAAGGAACTTTAATGACAATTATAAACACCTTGGTCTCCGCGC
++
+ABB>BA:/=@@;@@72))&,771&%&+%2),%)380+),,%)1,,%%12/,%/)
+ at 16:29068055:R:-232;None;None/1
+CGACATACCCACAAAAAAAGAAATTTGATACACACCCAATAGGCTACTGGCCAC
++
+::>>;C2>A773,49(1(2/,'/7/%+/,.)-++%++),2'5%%%))%,(4471
+ at 1:4694340:F:179;None;None/1
+GTTAAGTGACTTGCCTGAGGTCACACAGCCTGAAGGGGGTGGAGTCAGGATTCA
++
+DDD@;?D>DB2B>DDDDCCCDA6BD35A?ADD<+ at 9BCCCCC>?<75?<<>7+9
+ at 17:76451446:F:221;None;None/1
+GCCAGAAGCTCAGAGGGATTCATGGGGAGGCCTTAAAGAAGGTGGGCTCGGGCA
++
+FF?CDCDB<@=@6CEGA;=2=AF=127 at D0EB=?B7AC:C@@>88;EEGE=C;&
+ at 1:242817485:F:158;None;None/1
+CATTTGTTTTCAAACAAAGCACGTATGTGCATTTCTTTAAAAAAGCAATTATGA
++
+DCACDD8ED>DC;C;BDD;D??9<@B=BB:.?9+=277.78/1:8=7.<15900
+ at 18:29611708:R:-146;None;18,29611585,G,A/1
+ACCACGATGCCCTACTCACACTCTGTGCTCCAGCAACAATACATTGCTTAAAAT
++
+CACCCA<BC<CB<BCBA<=AAB6BC<>77<3+5B6133073.59779<<:717<
+ at 9:36493018:F:206;None;None/1
+AGTGAGCAGAGATTGCACCATTGCACTCCAGCCTGGGCAACAAGATTGAAACTC
++
+CCEBD=CECEEE?CA:=DEECDDEEDEDB60)5<8,<>EEDCD:.+7>9;32%3
+ at 9:16274705:F:129;9,16274709,C,A:9,16274719,C,G:9,16274741,A,T;9,16274862,A,G/1
+TTTCACATGCTGTTGTAGTGATAGGGAATAAGTCTCTCGAGATCTGACGGTTTT
++
+:C>CEEEEDA?BEA<@EDBDEECDBA@?B=;:BB8:;=>:;187.2:@9'593<
diff --git a/demo/Virus_genome.fa.gz b/demo/Virus_genome.fa.gz
new file mode 100644
index 0000000..f1c03ff
Binary files /dev/null and b/demo/Virus_genome.fa.gz differ
diff --git a/demo/ref.fa b/demo/ref.fa
new file mode 100644
index 0000000..a61a192
--- /dev/null
+++ b/demo/ref.fa
@@ -0,0 +1,15 @@
+>chr1	50000	51000
+TAAACAGGTTAATCGCCACGACATAGTAGTATTTAGAGTTACTAGTAAGCCTGATGCCACTACACAATTCTAGCTTTTCTCTTTAGGATGATTGTTTCATTCAGTCTTATCTCTTTTAGAAAACATAGGAAAAAATTATTTAATAATAAAATTTAATTGGCAAAATGAAGGTATGGCTTATAAGAGTGTTTTCCTATTGTTTTCAGTGTAGGACTCACTGTTCTAAATAACTGGGACACCCAAGGATTCTGTAAAATGCCATCCAGTTATCATTTATATTCCCTAACTCAAAATTCATTCACATGTATTCATTTTTTTCTAAACAAATTAGCATGTAGAATTCTGGTTAAAATTTGGCATAGAACACCCGGGTATTTTTTCATAATGCACCCAATAACTGTCATTCACTAATTGAGAATGGTGATTTAACAAAGGATAATAAAGTTATGAAACCAATGCCACAAAACATCTGTCTCTAACTGGTGTGTGTGT [...]
+
+>chr2	50000	10050000
+ACCCAAAAGAAAATAAATCATTCTAACAAAAAGACATATGCATGCATGTAGTCATTGCAGCACTATTCATAATAACAAAGACATGGAATCAATGAAGATGCCCATCAGTGGTGGACTGGGTAAAGAAAATGTGCTACATATACACCATAGAATACTATGCAGCCATTAAAAAAGAGTGAAATCATGTCCTTTGCAGCAACATGCATTTAGCTAGAGGCCATTATCCTAAGCAAACTAATTCAGGAAGAGAAAACCAGATCCCACATATAAGTGGGAGCTAAACATTGAGTACATATAGACACAGAGAACGGAACAATAGATACTGGGGACTGCTTGATCGTGGGAGGGTGAGGAGAGGGCATAGGTTGGAAGGCTATCTGTGGGTACTATGCTCACTATCCTAGTGATGGGATCATTTGTACACCAAGCATCAGTGACCCACAATCTACCCATGTAACAAACCTGCACATGTACTCACTGAACCTAAATTTA [...]
+
+>chr3	50000	1050000
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN [...]
+
+>chr4	50000	150000
+GGACCCCGCTCTGTAATGTAAAATGTGGGGACCCCACTCTCTAATGTAAAATGTGAGCTGCCAGCTGTGGGCTGTGCCTCAGTGGTAGATGGCAGGGTTTGGGAGAGGACACAGGCCACCAGGAGAGGGCAAGCAGAATTGCTGCAGCCCAGGGCTTAGTGAGTAGGGATCCATTGCTTTGAAATATAAATAGCCGGCCGGGCGCGGTGGCTCATGCCTGTAATCCCAGCACTTTGGGAGGCCAAGGCGGGCAGATCACAAGGTCAGGAGATCGAGACCATCCTGGCTAACACAGTGAAATCCCGTCTCTACTAAAAATACAAAAAAATTAGCCAGGTGTGGTGGCGGGCACCTGTAGTCCCAGCTACTGGGGAGGCTGAGTCAGGAGAATGGCGTGAACCCGGGAGGCAGAGCTTGCAGTAAGCCAAGATCGCACCACTGCACTCCAGCCTGGGCGACAGAGCAAGACTCTGTCTCAAAAAAAGAAAAAAA [...]
+
+>chr5	50000	60000
+ATAAACCACATATTAATTGAGGTAAAATAAGTGGCCAGAGAACCCACATAATTTAGTTGCAGTAAACTTCTGCTGCATATTTAAAGGAAAATAAACGAAATAATAGTTTTCAAAACATAAAAATTATTCCACTCTTTCTGAAAACACACTGCTAATCTAAGCCTAACCAAAAAGCTAACCCTAACCCTACCACTAACCCTAATGCTACCACTAGCCTCTAACCCTACTGCTAACTCTAACACCTAACCCTAATCCCAAACCCCTAAACCAAACCCTAGTCCTGAACCCTAACCCTAACCCCAACCCAAACCCCAATACCGACACTACACTAATCATAACCCAACCCTAACTCTAACCGCAAAACCCTAACCCCTGACCCTAAACCCAACCCCAACCCAAAACCTAACTCTAACCCCTGAACCAAACCCTAAAACCCCACAAACCCCAAAAATATTCCAAACCGAACCTCACCCTAACCATAAATCCCAACCC [...]
+
diff --git a/demo/test.seq b/demo/test.seq
new file mode 100644
index 0000000..b774ae2
--- /dev/null
+++ b/demo/test.seq
@@ -0,0 +1,12 @@
+>1
+acgtacgtacgtagc
+>2 test
+acgatcgatc
+ at 3 test2
+cgctagcatagc
+cgatatgactta
++
+78wo82usd980
+d88fau
+
+238ud8
diff --git a/src/.gitignore b/src/.gitignore
new file mode 100644
index 0000000..c0117fa
--- /dev/null
+++ b/src/.gitignore
@@ -0,0 +1,12 @@
+*.*~
+example_c
+example_cpp
+libssw.so
+libsswjni.so
+ssw.jar
+ssw.o
+ssw/Aligner.class
+ssw/Alignment.class
+ssw/Example.class
+ssw_cpp.o
+ssw_test 
diff --git a/src/Makefile b/src/Makefile
new file mode 100644
index 0000000..f726378
--- /dev/null
+++ b/src/Makefile
@@ -0,0 +1,53 @@
+CC = gcc
+CXX = g++
+CFLAGS := -Wall -pipe -O3
+CXXFLAGS := $(CFLAGS)
+LOBJS = ssw.o
+LCPPOBJS = ssw_cpp.o
+PROG = ssw_test
+LIB = libssw.so
+EXAMPLE = example_c
+EXAMPLE_CPP = example_cpp
+JAVA_JAR = ssw.jar
+JAVA_LIB = libsswjni.so
+JAVA_INLCUDES = -I"$(JAVA_HOME)/include" -I"$(JAVA_HOME)/include/linux"
+JAVA_OBJ = ssw/Aligner.class ssw/Alignment.class ssw/Example.class
+
+.PHONY: all default java clean
+
+default: $(PROG) $(EXAMPLE) $(EXAMPLE_CPP) $(LIB) 
+
+all: default java
+
+java: $(JAVA_JAR) $(JAVA_LIB)
+
+$(LIB): ssw.c ssw.h
+	$(CC) $(CFLAGS) -fPIC -shared -rdynamic -o $@ $<
+
+$(PROG): main.c kseq.h
+
+$(EXAMPLE): example.c
+
+$(PROG) $(EXAMPLE): $(LOBJS)
+	$(CC) -o $@ $(filter-out %.h,$^) $(CFLAGS) -lm -lz
+
+$(EXAMPLE_CPP): example.cpp $(LOBJS) $(LCPPOBJS)
+	$(CXX) -o $@ $^ $(CXXFLAGS) -lm -lz
+
+$(JAVA_LIB): sswjni.c ssw.c ssw.h
+	$(CC) $(CFLAGS) $(JAVA_INLCUDES) -fPIC -shared -rdynamic -o $@ $< ssw.c 
+
+$(JAVA_JAR): $(JAVA_OBJ)
+	jar cvfe $@ ssw.Example $^
+
+%.class: %.java
+	javac -cp ./ $<
+	
+ssw.o: ssw.c ssw.h
+	$(CC) -c -o $@ $< $(CFLAGS)
+
+ssw_cpp.o: ssw_cpp.cpp ssw_cpp.h ssw.h
+	$(CXX) -c -o $@ $< $(CXXFLAGS)
+
+clean:
+	-rm -f $(LOBJS) $(LCPPOBJS) $(PROG) $(LIB) $(EXAMPLE) $(EXAMPLE_CPP) $(JAVA_LIB) $(JAVA_JAR) $(JAVA_OBJ) *~ 
diff --git a/src/example.c b/src/example.c
new file mode 100644
index 0000000..32d6672
--- /dev/null
+++ b/src/example.c
@@ -0,0 +1,156 @@
+/*	example.c
+ *	This is a simple example to show you how to use the SSW C library.
+ *	To run this example:
+ *	1) gcc -Wall -lz ssw.c example.c
+ *	2) ./a.out
+ *	Created by Mengyao Zhao on 07/31/12.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include "ssw.h"
+
+//	Print the BLAST like output.
+static void ssw_write (const s_align* a,
+			const char* ref_seq,
+			const char* read_seq,
+			const int8_t* table) {
+
+	fprintf(stdout, "optimal_alignment_score: %d\tsub-optimal_alignment_score: %d\t", a->score1, a->score2);
+	if (a->ref_begin1 + 1) fprintf(stdout, "target_begin: %d\t", a->ref_begin1 + 1);
+	fprintf(stdout, "target_end: %d\t", a->ref_end1 + 1);
+	if (a->read_begin1 + 1) fprintf(stdout, "query_begin: %d\t", a->read_begin1 + 1);
+	fprintf(stdout, "query_end: %d\n\n", a->read_end1 + 1);
+	if (a->cigar) {
+		int32_t c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
+		uint32_t i;
+		while (e < a->cigarLen || left > 0) {
+			int32_t count = 0;
+			int32_t q = qb;
+			int32_t p = pb;
+			fprintf(stdout, "Target: %8d    ", q + 1);
+			for (c = e; c < a->cigarLen; ++c) {
+				char letter = cigar_int_to_op(a->cigar[c]);
+				uint32_t length = cigar_int_to_len(a->cigar[c]);
+				uint32_t l = (count == 0 && left > 0) ? left: length;
+				for (i = 0; i < l; ++i) {
+					if (letter == 'I') fprintf(stdout, "-");
+					else {
+						fprintf(stdout, "%c", *(ref_seq + q));
+						++ q;
+					}
+					++ count;
+					if (count == 60) goto step2;
+				}
+			}
+step2:
+			fprintf(stdout, "    %d\n                    ", q);
+			q = qb;
+			count = 0;
+			for (c = e; c < a->cigarLen; ++c) {
+				char letter = cigar_int_to_op(a->cigar[c]);
+				uint32_t length = cigar_int_to_len(a->cigar[c]);
+				uint32_t l = (count == 0 && left > 0) ? left: length;
+				for (i = 0; i < l; ++i){
+					if (letter == 'M') {
+						if (table[(int)*(ref_seq + q)] == table[(int)*(read_seq + p)])fprintf(stdout, "|");
+						else fprintf(stdout, "*");
+						++q;
+						++p;
+					} else {
+						fprintf(stdout, "*");
+						if (letter == 'I') ++p;
+						else ++q;
+					}
+					++ count;
+					if (count == 60) {
+						qb = q;
+						goto step3;
+					}
+				}
+			}
+step3:
+			p = pb;
+			fprintf(stdout, "\nQuery:  %8d    ", p + 1);
+			count = 0;
+			for (c = e; c < a->cigarLen; ++c) {
+				char letter = cigar_int_to_op(a->cigar[c]);
+				uint32_t length = cigar_int_to_len(a->cigar[c]);
+				uint32_t l = (count == 0 && left > 0) ? left: length;
+				for (i = 0; i < l; ++i) {
+					if (letter == 'D') fprintf(stdout, "-");
+					else {
+						fprintf(stdout, "%c", *(read_seq + p));
+						++p;
+					}
+					++ count;
+					if (count == 60) {
+						pb = p;
+						left = l - i - 1;
+						e = (left == 0) ? (c + 1) : c;
+						goto end;
+					}
+				}
+			}
+			e = c;
+			left = 0;
+end:
+			fprintf(stdout, "    %d\n\n", p);
+		}
+	}
+}
+
+//	Align a pair of genome sequences.
+int main (int argc, char * const argv[]) {
+	int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1;	// default parameters for genome sequence alignment
+	// reference sequence
+	static const char ref_seq[40] = {'C', 'A', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'T', 'G', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'A', 'A', 'T',
+						'C', 'A', 'A', 'A', 'A', 'T', 'A', 'G', 'G', 'C', 'A', 'C', 'A', 'A', 'C', 'A', 'A', 'A', '\0'};
+	static const char read_seq[16] = {'C', 'T', 'G', 'A', 'G', 'C', 'C', 'G', 'G', 'T', 'A', 'A', 'A', 'T', 'C', '\0'};	// read sequence
+	s_profile* profile;
+	int8_t* num = (int8_t*)malloc(16);	// the read sequence represented in numbers
+	int8_t* ref_num = (int8_t*)malloc(64);	// the read sequence represented in numbers
+	s_align* result;
+
+	/* This table is used to transform nucleotide letters into numbers. */
+	static const int8_t nt_table[128] = {
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+	};
+
+	// initialize scoring matrix for genome sequences
+	//  A  C  G  T	N (or other ambiguous code)
+	//  2 -2 -2 -2 	0	A
+	// -2  2 -2 -2 	0	C
+	// -2 -2  2 -2 	0	G
+	// -2 -2 -2  2 	0	T
+	//	0  0  0  0  0	N (or other ambiguous code)
+	int8_t* mat = (int8_t*)calloc(25, sizeof(int8_t));
+	for (l = k = 0; l < 4; ++l) {
+		for (m = 0; m < 4; ++m) mat[k++] = l == m ? match : - mismatch;	/* weight_match : -weight_mismatch */
+		mat[k++] = 0; // ambiguous base: no penalty
+	}
+	for (m = 0; m < 5; ++m) mat[k++] = 0;
+
+	for (m = 0; m < 15; ++m) num[m] = nt_table[(int)read_seq[m]];
+	profile = ssw_init(num, 15, mat, 5, 2);
+	for (m = 0; m < 39; ++m) ref_num[m] = nt_table[(int)ref_seq[m]];
+
+	// Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
+	result = ssw_align (profile, ref_num, 39, gap_open, gap_extension, 1, 0, 0, 15);
+	ssw_write(result, ref_seq, read_seq, nt_table);
+
+	align_destroy(result);
+	init_destroy(profile);
+	free(mat);
+	free(ref_num);
+	free(num);
+	return(0);
+}
diff --git a/src/example.cpp b/src/example.cpp
new file mode 100644
index 0000000..87181d7
--- /dev/null
+++ b/src/example.cpp
@@ -0,0 +1,53 @@
+// ==========================
+// example.cpp
+// This is a simple example to show you how to use the SSW C++ library.
+// To run this example:
+// 1) g++ -Wall ssw_cpp.cpp ssw.c example.cpp
+// 2) ./a.out
+// Created by Wan-Ping Lee on 09/04/12.
+// ==========================
+
+#include <iostream>
+#include <string>
+
+#include "ssw_cpp.h"
+
+using std::string;
+using std::cout;
+using std::endl;
+
+static void PrintAlignment(const StripedSmithWaterman::Alignment& alignment);
+
+int main() {
+  const string ref   = "CAGCCTTTCTGACCCGGAAATCAAAATAGGCACAACAAA";
+  const string query = "CTGAGCCGGTAAATC";
+  //const string ref   = "CCGTTTATCGCA";
+  //const string query = "CCTTTTATCGCA";
+
+  // Declares a default Aligner
+  StripedSmithWaterman::Aligner aligner;
+  // Declares a default filter
+  StripedSmithWaterman::Filter filter;
+  // Declares an alignment that stores the result
+  StripedSmithWaterman::Alignment alignment;
+  // Aligns the query to the ref
+  aligner.Align(query.c_str(), ref.c_str(), ref.size(), filter, &alignment);
+
+  PrintAlignment(alignment);
+
+  return 0;
+}
+
+static void PrintAlignment(const StripedSmithWaterman::Alignment& alignment){
+  cout << "===== SSW result =====" << endl;
+  cout << "Best Smith-Waterman score:\t" << alignment.sw_score << endl
+       << "Next-best Smith-Waterman score:\t" << alignment.sw_score_next_best << endl
+       << "Reference start:\t" << alignment.ref_begin << endl
+       << "Reference end:\t" << alignment.ref_end << endl
+       << "Query start:\t" << alignment.query_begin << endl
+       << "Query end:\t" << alignment.query_end << endl
+       << "Next-best reference end:\t" << alignment.ref_end_next_best << endl
+       << "Number of mismatches:\t" << alignment.mismatches << endl
+       << "Cigar: " << alignment.cigar_string << endl;
+  cout << "======================" << endl;
+}
diff --git a/src/kseq.h b/src/kseq.h
new file mode 100644
index 0000000..bbe0125
--- /dev/null
+++ b/src/kseq.h
@@ -0,0 +1,223 @@
+/* The MIT License
+
+   Copyright (c) 2008 Genome Research Ltd (GRL).
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Heng Li <lh3 at sanger.ac.uk> */
+
+/* Last Modified: 12APR2009 */
+
+#ifndef AC_KSEQ_H
+#define AC_KSEQ_H
+
+#include <ctype.h>
+#include <string.h>
+#include <stdlib.h>
+
+#define KS_SEP_SPACE 0 // isspace(): \t, \n, \v, \f, \r
+#define KS_SEP_TAB   1 // isspace() && !' '
+#define KS_SEP_MAX   1
+
+#define __KS_TYPE(type_t)						\
+	typedef struct __kstream_t {				\
+		char *buf;								\
+		int begin, end, is_eof;					\
+		type_t f;								\
+	} kstream_t;
+
+#define ks_eof(ks) ((ks)->is_eof && (ks)->begin >= (ks)->end)
+#define ks_rewind(ks) ((ks)->is_eof = (ks)->begin = (ks)->end = 0)
+
+#define __KS_BASIC(type_t, __bufsize)								\
+	static inline kstream_t *ks_init(type_t f)						\
+	{																\
+		kstream_t *ks = (kstream_t*)calloc(1, sizeof(kstream_t));	\
+		ks->f = f;													\
+		ks->buf = (char*)malloc(__bufsize);							\
+		return ks;													\
+	}																\
+	static inline void ks_destroy(kstream_t *ks)					\
+	{																\
+		if (ks) {													\
+			free(ks->buf);											\
+			free(ks);												\
+		}															\
+	}
+
+#define __KS_GETC(__read, __bufsize)						\
+	static inline int ks_getc(kstream_t *ks)				\
+	{														\
+		if (ks->is_eof && ks->begin >= ks->end) return -1;	\
+		if (ks->begin >= ks->end) {							\
+			ks->begin = 0;									\
+			ks->end = __read(ks->f, ks->buf, __bufsize);	\
+			if (ks->end < __bufsize) ks->is_eof = 1;		\
+			if (ks->end == 0) return -1;					\
+		}													\
+		return (int)ks->buf[ks->begin++];					\
+	}
+
+#ifndef KSTRING_T
+#define KSTRING_T kstring_t
+typedef struct __kstring_t {
+	size_t l, m;
+	char *s;
+} kstring_t;
+#endif
+
+#ifndef kroundup32
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+#endif
+
+#define __KS_GETUNTIL(__read, __bufsize)								\
+	static int ks_getuntil(kstream_t *ks, int delimiter, kstring_t *str, int *dret) \
+	{																	\
+		if (dret) *dret = 0;											\
+		str->l = 0;														\
+		if (ks->begin >= ks->end && ks->is_eof) return -1;				\
+		for (;;) {														\
+			int i;														\
+			if (ks->begin >= ks->end) {									\
+				if (!ks->is_eof) {										\
+					ks->begin = 0;										\
+					ks->end = __read(ks->f, ks->buf, __bufsize);		\
+					if (ks->end < __bufsize) ks->is_eof = 1;			\
+					if (ks->end == 0) break;							\
+				} else break;											\
+			}															\
+			if (delimiter > KS_SEP_MAX) {								\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (ks->buf[i] == delimiter) break;					\
+			} else if (delimiter == KS_SEP_SPACE) {						\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (isspace(ks->buf[i])) break;						\
+			} else if (delimiter == KS_SEP_TAB) {						\
+				for (i = ks->begin; i < ks->end; ++i)					\
+					if (isspace(ks->buf[i]) && ks->buf[i] != ' ') break; \
+			} else i = 0; /* never come to here! */						\
+			if (str->m - str->l < i - ks->begin + 1) {					\
+				str->m = str->l + (i - ks->begin) + 1;					\
+				kroundup32(str->m);										\
+				str->s = (char*)realloc(str->s, str->m);				\
+			}															\
+			memcpy(str->s + str->l, ks->buf + ks->begin, i - ks->begin); \
+			str->l = str->l + (i - ks->begin);							\
+			ks->begin = i + 1;											\
+			if (i < ks->end) {											\
+				if (dret) *dret = ks->buf[i];							\
+				break;													\
+			}															\
+		}																\
+		if (str->l == 0) {												\
+			str->m = 1;													\
+			str->s = (char*)calloc(1, 1);								\
+		}																\
+		str->s[str->l] = '\0';											\
+		return str->l;													\
+	}
+
+#define KSTREAM_INIT(type_t, __read, __bufsize) \
+	__KS_TYPE(type_t)							\
+	__KS_BASIC(type_t, __bufsize)				\
+	__KS_GETC(__read, __bufsize)				\
+	__KS_GETUNTIL(__read, __bufsize)
+
+#define __KSEQ_BASIC(type_t)											\
+	static inline kseq_t *kseq_init(type_t fd)							\
+	{																	\
+		kseq_t *s = (kseq_t*)calloc(1, sizeof(kseq_t));					\
+		s->f = ks_init(fd);												\
+		return s;														\
+	}																	\
+	static inline void kseq_rewind(kseq_t *ks)							\
+	{																	\
+		ks->last_char = 0;												\
+		ks->f->is_eof = ks->f->begin = ks->f->end = 0;					\
+	}																	\
+	static inline void kseq_destroy(kseq_t *ks)							\
+	{																	\
+		if (!ks) return;												\
+		free(ks->name.s); free(ks->comment.s); free(ks->seq.s);	free(ks->qual.s); \
+		ks_destroy(ks->f);												\
+		free(ks);														\
+	}
+
+/* Return value:
+   >=0  length of the sequence (normal)
+   -1   end-of-file
+   -2   truncated quality string
+ */
+#define __KSEQ_READ														\
+	static int kseq_read(kseq_t *seq)									\
+	{																	\
+		int c;															\
+		kstream_t *ks = seq->f;											\
+		if (seq->last_char == 0) { /* then jump to the next header line */ \
+			while ((c = ks_getc(ks)) != -1 && c != '>' && c != '@');	\
+			if (c == -1) return -1; /* end of file */					\
+			seq->last_char = c;											\
+		} /* the first header char has been read */						\
+		seq->comment.l = seq->seq.l = seq->qual.l = 0;					\
+		if (ks_getuntil(ks, 0, &seq->name, &c) < 0) return -1;			\
+		if (c != '\n') ks_getuntil(ks, '\n', &seq->comment, 0);			\
+		while ((c = ks_getc(ks)) != -1 && c != '>' && c != '+' && c != '@') { \
+			if (isgraph(c)) { /* printable non-space character */		\
+				if (seq->seq.l + 1 >= seq->seq.m) { /* double the memory */ \
+					seq->seq.m = seq->seq.l + 2;						\
+					kroundup32(seq->seq.m); /* rounded to next closest 2^k */ \
+					seq->seq.s = (char*)realloc(seq->seq.s, seq->seq.m); \
+				}														\
+				seq->seq.s[seq->seq.l++] = (char)c;						\
+			}															\
+		}																\
+		if (c == '>' || c == '@') seq->last_char = c; /* the first header char has been read */	\
+		seq->seq.s[seq->seq.l] = 0;	/* null terminated string */		\
+		if (c != '+') return seq->seq.l; /* FASTA */					\
+		if (seq->qual.m < seq->seq.m) {	/* allocate enough memory */	\
+			seq->qual.m = seq->seq.m;									\
+			seq->qual.s = (char*)realloc(seq->qual.s, seq->qual.m);		\
+		}																\
+		while ((c = ks_getc(ks)) != -1 && c != '\n'); /* skip the rest of '+' line */ \
+		if (c == -1) return -2; /* we should not stop here */			\
+		while ((c = ks_getc(ks)) != -1 && seq->qual.l < seq->seq.l)		\
+			if (c >= 33 && c <= 127) seq->qual.s[seq->qual.l++] = (unsigned char)c;	\
+		seq->qual.s[seq->qual.l] = 0; /* null terminated string */		\
+		seq->last_char = 0;	/* we have not come to the next header line */ \
+		if (seq->seq.l != seq->qual.l) return -2; /* qual string is shorter than seq string */ \
+		return seq->seq.l;												\
+	}
+
+#define __KSEQ_TYPE(type_t)						\
+	typedef struct {							\
+		kstring_t name, comment, seq, qual;		\
+		int last_char;							\
+		kstream_t *f;							\
+	} kseq_t;
+
+#define KSEQ_INIT(type_t, __read)				\
+	KSTREAM_INIT(type_t, __read, 4096)			\
+	__KSEQ_TYPE(type_t)							\
+	__KSEQ_BASIC(type_t)						\
+	__KSEQ_READ
+
+#endif
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..a394be4
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,465 @@
+/*  main.c
+ *  Created by Mengyao Zhao on 06/23/11.
+ *	Version 0.1.5
+ *  Last revision by Mengyao Zhao on 02/23/16.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <emmintrin.h>
+#include <zlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <math.h>
+#include <unistd.h>
+#include "ssw.h"
+#include "kseq.h"
+
+#ifdef __GNUC__
+#define LIKELY(x) __builtin_expect((x),1)
+#define UNLIKELY(x) __builtin_expect((x),0)
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+/*! @function
+  @abstract  Round an integer to the next closest power-2 integer.
+  @param  x  integer to be rounded (in place)
+  @discussion x will be modified.
+ */
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+
+KSEQ_INIT(gzFile, gzread)
+
+static void reverse_comple(const char* seq, char* rc) {
+	int32_t end = strlen(seq), start = 0;
+	static const int8_t rc_table[128] = {
+		4, 4,  4, 4,  4,  4,  4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4,  4, 4,  4,  4,  4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4,  4, 4,  4,  4,  4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4,  4, 4,  4,  4,  4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 84, 4, 71, 4,  4,  4, 67, 4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4,  4, 4,  65, 65, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 84, 4, 71, 4,  4,  4, 67, 4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4,  4, 4,  65, 65, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+	};
+	rc[end] = '\0';
+	-- end;
+	while (LIKELY(start < end)) {
+		rc[start] = (char)rc_table[(int8_t)seq[end]];
+		rc[end] = (char)rc_table[(int8_t)seq[start]];
+		++ start;
+		-- end;
+	}
+	if (start == end) rc[start] = (char)rc_table[(int8_t)seq[start]];
+}
+
+static void ssw_write (const s_align* a,
+			const kseq_t* ref_seq,
+			const kseq_t* read,
+			const char* read_seq,	// strand == 0: original read; strand == 1: reverse complement read
+			const int8_t* table,
+			int8_t strand,	// 0: forward aligned ; 1: reverse complement aligned
+			int8_t sam) {	// 0: Blast like output; 1: Sam format output
+
+	if (sam == 0) {	// Blast like output
+		fprintf(stdout, "target_name: %s\nquery_name: %s\noptimal_alignment_score: %d\t", ref_seq->name.s, read->name.s, a->score1);
+		if (a->score2 > 0) fprintf(stdout, "suboptimal_alignment_score: %d\t", a->score2);
+		if (strand == 0) fprintf(stdout, "strand: +\t");
+		else fprintf(stdout, "strand: -\t");
+		if (a->ref_begin1 + 1) fprintf(stdout, "target_begin: %d\t", a->ref_begin1 + 1);
+		fprintf(stdout, "target_end: %d\t", a->ref_end1 + 1);
+		if (a->read_begin1 + 1) fprintf(stdout, "query_begin: %d\t", a->read_begin1 + 1);
+		fprintf(stdout, "query_end: %d\n\n", a->read_end1 + 1);
+		if (a->cigar) {
+			int32_t c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
+			uint32_t i;
+			while (e < a->cigarLen || left > 0) {
+				int32_t count = 0;
+				int32_t q = qb;
+				int32_t p = pb;
+				fprintf(stdout, "Target: %8d    ", q + 1);
+				for (c = e; c < a->cigarLen; ++c) {
+					char letter = cigar_int_to_op(a->cigar[c]);
+					uint32_t length = cigar_int_to_len(a->cigar[c]);
+					uint32_t l = (count == 0 && left > 0) ? left: length;
+					for (i = 0; i < l; ++i) {
+						if (letter == 'I') fprintf(stdout, "-");
+						else {
+							fprintf(stdout, "%c", *(ref_seq->seq.s + q));
+							++ q;
+						}
+						++ count;
+						if (count == 60) goto step2;
+					}
+				}
+step2:
+				fprintf(stdout, "    %d\n                    ", q);
+				q = qb;
+				count = 0;
+				for (c = e; c < a->cigarLen; ++c) {
+					char letter = cigar_int_to_op(a->cigar[c]);
+					uint32_t length = cigar_int_to_len(a->cigar[c]);
+					uint32_t l = (count == 0 && left > 0) ? left: length;
+					for (i = 0; i < l; ++i){
+						if (letter == 'M') {
+							if (table[(int)*(ref_seq->seq.s + q)] == table[(int)*(read_seq + p)])fprintf(stdout, "|");
+							else fprintf(stdout, "*");
+							++q;
+							++p;
+						} else {
+							fprintf(stdout, " ");
+							if (letter == 'I') ++p;
+							else ++q;
+						}
+						++ count;
+						if (count == 60) {
+							qb = q;
+							goto step3;
+						}
+					}
+				}
+step3:
+				p = pb;
+				fprintf(stdout, "\nQuery:  %8d    ", p + 1);
+				count = 0;
+				for (c = e; c < a->cigarLen; ++c) {
+					char letter = cigar_int_to_op(a->cigar[c]);
+					uint32_t length = cigar_int_to_len(a->cigar[c]);
+					uint32_t l = (count == 0 && left > 0) ? left: length;
+					for (i = 0; i < l; ++i) {
+						if (letter == 'D') fprintf(stdout, "-");
+						else {
+							fprintf(stdout, "%c", *(read_seq + p));
+							++p;
+						}
+						++ count;
+						if (count == 60) {
+							pb = p;
+							left = l - i - 1;
+							e = (left == 0) ? (c + 1) : c;
+							goto end;
+						}
+					}
+				}
+				e = c;
+				left = 0;
+end:
+				fprintf(stdout, "    %d\n\n", p);
+			}
+		}
+	}else {	// Sam format output
+		fprintf(stdout, "%s\t", read->name.s);
+		if (a->score1 == 0) fprintf(stdout, "4\t*\t0\t255\t*\t*\t0\t0\t*\t*\n");
+		else {
+			int32_t c, l = a->read_end1 - a->read_begin1 + 1, qb = a->ref_begin1, pb = a->read_begin1, p;
+			uint32_t mapq = -4.343 * log(1 - (double)abs(a->score1 - a->score2)/(double)a->score1);
+			mapq = (uint32_t) (mapq + 4.99);
+			mapq = mapq < 254 ? mapq : 254;
+			if (strand) fprintf(stdout, "16\t");
+			else fprintf(stdout, "0\t");
+			fprintf(stdout, "%s\t%d\t%d\t", ref_seq->name.s, a->ref_begin1 + 1, mapq);
+			for (c = 0; c < a->cigarLen; ++c) {
+				char letter = cigar_int_to_op(a->cigar[c]);
+				uint32_t length = cigar_int_to_len(a->cigar[c]);
+				fprintf(stdout, "%lu%c", (unsigned long)length, letter);
+			}
+			fprintf(stdout, "\t*\t0\t0\t");
+			for (c = a->read_begin1; c <= a->read_end1; ++c) fprintf(stdout, "%c", read_seq[c]);
+			fprintf(stdout, "\t");
+			if (read->qual.s && strand) {
+				p = a->read_end1;
+				for (c = 0; c < l; ++c) {
+					fprintf(stdout, "%c", read->qual.s[p]);
+					--p;
+				}
+			}else if (read->qual.s){
+				p = a->read_begin1;
+				for (c = 0; c < l; ++c) {
+					fprintf(stdout, "%c", read->qual.s[p]);
+					++p;
+				}
+			} else fprintf(stdout, "*");
+			fprintf(stdout, "\tAS:i:%d", a->score1);
+			mapq = 0;	// counter of difference
+			for (c = 0; c < a->cigarLen; ++c) {
+				char letter = cigar_int_to_op(a->cigar[c]);
+				uint32_t length = cigar_int_to_len(a->cigar[c]);
+				if (letter == 'M') {
+					for (p = 0; p < length; ++p){
+						if (table[(int)*(ref_seq->seq.s + qb)] != table[(int)*(read_seq + pb)]) ++mapq;
+						++qb;
+						++pb;
+					}
+				} else if (letter == 'I') {
+					pb += length;
+					mapq += length;
+				} else {
+					qb += length;
+					mapq += length;
+				}
+			}
+			fprintf(stdout,"\tNM:i:%d\t", mapq);
+			if (a->score2 > 0) fprintf(stdout, "ZS:i:%d\n", a->score2);
+			else fprintf(stdout, "\n");
+		}
+	}
+}
+
+int main (int argc, char * const argv[]) {
+	clock_t start, end;
+	float cpu_time;
+	gzFile read_fp, ref_fp;
+	kseq_t *read_seq, *ref_seq;
+	int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1, path = 0, reverse = 0, n = 5, sam = 0, protein = 0, header = 0, s1 = 67108864, s2 = 128, filter = 0;
+	int8_t* mata = (int8_t*)calloc(25, sizeof(int8_t));
+	const int8_t* mat = mata;
+	char mat_name[16];
+	mat_name[0] = '\0';
+	int8_t* ref_num = (int8_t*)malloc(s1);
+	int8_t* num = (int8_t*)malloc(s2), *num_rc = 0;
+	char* read_rc = 0;
+
+	static const int8_t mat50[] = {
+	//  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
+     	5, -2, -1, -2, -1, -1, -1,  0, -2, -1, -2, -1, -1, -3, -1,  1,  0, -3, -2,  0, -2, -1, -1, -5,	// A
+       -2,  7, -1, -2, -4,  1,  0, -3,  0, -4, -3,  3, -2, -3, -3, -1, -1, -3, -1, -3, -1,  0, -1, -5,	// R
+       -1, -1,  7,  2, -2,  0,  0,  0,  1, -3, -4,  0, -2, -4, -2,  1,  0, -4, -2, -3,  5,  0, -1, -5,	// N
+       -2, -2,  2,  8, -4,  0,  2, -1, -1, -4, -4, -1, -4, -5, -1,  0, -1, -5, -3, -4,  6,  1, -1, -5,	// D
+       -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5,	// C
+       -1,  1,  0,  0, -3,  7,  2, -2,  1, -3, -2,  2,  0, -4, -1,  0, -1, -1, -1, -3,  0,  4, -1, -5,	// Q
+       -1,  0,  0,  2, -3,  2,  6, -3,  0, -4, -3,  1, -2, -3, -1, -1, -1, -3, -2, -3,  1,  5, -1, -5,	// E
+     	0, -3,  0, -1, -3, -2, -3,  8, -2, -4, -4, -2, -3, -4, -2,  0, -2, -3, -3, -4, -1, -2, -1, -5,	// G
+       -2,  0,  1, -1, -3,  1,  0, -2, 10, -4, -3,  0, -1, -1, -2, -1, -2, -3,  2, -4,  0,  0, -1, -5,	// H
+       -1, -4, -3, -4, -2, -3, -4, -4, -4,  5,  2, -3,  2,  0, -3, -3, -1, -3, -1,  4, -4, -3, -1, -5,	// I
+       -2, -3, -4, -4, -2, -2, -3, -4, -3,  2,  5, -3,  3,  1, -4, -3, -1, -2, -1,  1, -4, -3, -1, -5,	// L
+       -1,  3,  0, -1, -3,  2,  1, -2,  0, -3, -3,  6, -2, -4, -1,  0, -1, -3, -2, -3,  0,  1, -1, -5,	// K
+       -1, -2, -2, -4, -2,  0, -2, -3, -1,  2,  3, -2,  7,  0, -3, -2, -1, -1,  0,  1, -3, -1, -1, -5,	// M
+       -3, -3, -4, -5, -2, -4, -3, -4, -1,  0,  1, -4,  0,  8, -4, -3, -2,  1,  4, -1, -4, -4, -1, -5,	// F
+       -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5,	// P
+     	1, -1,  1,  0, -1,  0, -1,  0, -1, -3, -3,  0, -2, -3, -1,  5,  2, -4, -2, -2,  0,  0, -1, -5,	// S
+    	0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  2,  5, -3, -2,  0,  0, -1, -1, -5, 	// T
+       -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1,  1, -4, -4, -3, 15,  2, -3, -5, -2, -1, -5, 	// W
+       -2, -1, -2, -3, -3, -1, -2, -3,  2, -1, -1, -2,  0,  4, -3, -2, -2,  2,  8, -1, -3, -2, -1, -5, 	// Y
+     	0, -3, -3, -4, -1, -3, -3, -4, -4,  4,  1, -3,  1, -1, -3, -2,  0, -3, -1,  5, -3, -3, -1, -5, 	// V
+       -2, -1,  5,  6, -3,  0,  1, -1,  0, -4, -4,  0, -3, -4, -2,  0,  0, -5, -3, -3,  6,  1, -1, -5, 	// B
+       -1,  0,  0,  1, -3,  4,  5, -2,  0, -3, -3,  1, -1, -4, -1,  0, -1, -2, -2, -3,  1,  5, -1, -5, 	// Z
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, 	// X
+       -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,  1 	// *
+	};
+
+	/* This table is used to transform amino acid letters into numbers. */
+	int8_t aa_table[128] = {
+		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+		23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
+		23, 0,  20, 4,  3,  6,  13, 7,  8,  9,  23, 11, 10, 12, 2,  23,
+		14, 5,  1,  15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
+		23, 0,  20, 4,  3,  6,  13, 7,  8,  9,  23, 11, 10, 12, 2,  23,
+		14, 5,  1,  15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
+	};
+
+	/* This table is used to transform nucleotide letters into numbers. */
+	int8_t nt_table[128] = {
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+		4, 4, 4, 4,  3, 3, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+	};
+
+	int8_t* table = nt_table;
+
+	// Parse command line.
+	while ((l = getopt(argc, argv, "m:x:o:e:a:f:pcrsh")) >= 0) {
+		switch (l) {
+			case 'm': match = atoi(optarg); break;
+			case 'x': mismatch = atoi(optarg); break;
+			case 'o': gap_open = atoi(optarg); break;
+			case 'e': gap_extension = atoi(optarg); break;
+			case 'a': strcpy(mat_name, optarg); break;
+			case 'f': filter = atoi(optarg); break;
+			case 'p': protein = 1; break;
+			case 'c': path = 1; break;
+			case 'r': reverse = 1; break;
+			case 's': sam = 1; break;
+			case 'h': header = 1; break;
+		}
+	}
+	if (optind + 2 > argc) {
+		fprintf(stderr, "\n");
+		fprintf(stderr, "Usage: ssw_test [options] ... <target.fasta> <query.fasta>(or <query.fastq>)\n");
+		fprintf(stderr, "Options:\n");
+		fprintf(stderr, "\t-m N\tN is a positive integer for weight match in genome sequence alignment. [default: 2]\n");
+		fprintf(stderr, "\t-x N\tN is a positive integer. -N will be used as weight mismatch in genome sequence alignment. [default: 2]\n");
+		fprintf(stderr, "\t-o N\tN is a positive integer. -N will be used as the weight for the gap opening. [default: 3]\n");
+		fprintf(stderr, "\t-e N\tN is a positive integer. -N will be used as the weight for the gap extension. [default: 1]\n");
+		fprintf(stderr, "\t-p\tDo protein sequence alignment. Without this option, the ssw_test will do genome sequence alignment.\n");
+		fprintf(stderr, "\t-a FILE\tFILE is either the Blosum or Pam weight matrix. [default: Blosum50]\n");
+		fprintf(stderr, "\t-c\tReturn the alignment path.\n");
+		fprintf(stderr, "\t-f N\tN is a positive integer. Only output the alignments with the Smith-Waterman score >= N.\n");
+		fprintf(stderr, "\t-r\tThe best alignment will be picked between the original read alignment and the reverse complement read alignment.\n");
+		fprintf(stderr, "\t-s\tOutput in SAM format. [default: no header]\n");
+		fprintf(stderr, "\t-h\tIf -s is used, include header in SAM output.\n\n");
+		return 1;
+	}
+
+	// initialize scoring matrix for genome sequences
+	for (l = k = 0; LIKELY(l < 4); ++l) {
+		for (m = 0; LIKELY(m < 4); ++m) mata[k++] = l == m ? match : -mismatch;	/* weight_match : -weight_mismatch */
+		mata[k++] = 0; // ambiguous base
+	}
+	for (m = 0; LIKELY(m < 5); ++m) mata[k++] = 0;
+
+	if (protein == 1 && (! strcmp(mat_name, "\0"))) {
+		n = 24;
+		table = aa_table;
+		mat = mat50;
+	} else if (strcmp(mat_name, "\0")) {
+
+	// Parse score matrix.
+		FILE *f_mat = fopen(mat_name, "r");
+		char line[128];
+		mata = (int8_t*)realloc(mata, 1024 * sizeof(int8_t));
+		k = 0;
+		m = 0;
+		while (fgets(line, 128, f_mat)) {
+			if (line[0] == '*' || (line[0] >= 'A' && line[0] <= 'Z')) {
+				if (line[0] >= 'A' && line[0] <= 'Z') aa_table[(int)line[0]] = aa_table[(int)line[0] + 32] = m;
+				char str[4], *s = str;
+				str[0] = '\0';
+				l = 1;
+				while (line[l]) {
+					if ((line[l] >= '0' && line[l] <= '9') || line[l] == '-') *s++ = line[l];
+					else if (str[0] != '\0') {
+						*s = '\0';
+						mata[k++] = (int8_t)atoi(str);
+						s = str;
+						str[0] = '\0';
+					}
+					++l;
+				}
+				if (str[0] != '\0') {
+					*s = '\0';
+					mata[k++] = (int8_t)atoi(str);
+					s = str;
+					str[0] = '\0';
+				}
+				++m;
+			}
+		}
+		if (k == 0) {
+			fprintf(stderr, "Problem of reading the weight matrix file.\n");
+			return 1;
+		}
+		fclose(f_mat);
+		n = m;
+		table = aa_table;
+		mat = mata;
+	}
+
+	fprintf(stderr, "query: %s\n", argv[optind + 1]);
+	read_fp = gzopen(argv[optind + 1], "r");
+
+    if (! read_fp) {
+        fprintf (stderr, "gzopen of '%s' failed.\n", argv[optind + 1]);
+            exit (EXIT_FAILURE);
+    }
+
+	read_seq = kseq_init(read_fp);
+	if (sam && header && path) {
+		fprintf(stdout, "@HD\tVN:1.4\tSO:queryname\n");
+		ref_fp = gzopen(argv[optind], "r");
+		ref_seq = kseq_init(ref_fp);
+		while ((l = kseq_read(ref_seq)) >= 0) fprintf(stdout, "@SQ\tSN:%s\tLN:%d\n", ref_seq->name.s, (int32_t)ref_seq->seq.l);
+		kseq_destroy(ref_seq);
+		gzclose(ref_fp);
+	} else if (sam && !path) {
+		fprintf(stderr, "SAM format output is only available together with option -c.\n");
+		sam = 0;
+	}
+
+	// alignment
+	if (reverse == 1 && n == 5) {
+		read_rc = (char*)malloc(s2);
+		num_rc = (int8_t*)malloc(s2);
+	}
+	start = clock();
+	while (kseq_read(read_seq) >= 0) {
+		s_profile* p, *p_rc = 0;
+		int32_t readLen = read_seq->seq.l;
+		int32_t maskLen = readLen / 2;
+
+		while (readLen >= s2) {
+			++s2;
+			kroundup32(s2);
+			num = (int8_t*)realloc(num, s2);
+			if (reverse == 1 && n == 5) {
+				read_rc = (char*)realloc(read_rc, s2);
+				num_rc = (int8_t*)realloc(num_rc, s2);
+			}
+		}
+		for (m = 0; m < readLen; ++m) num[m] = table[(int)read_seq->seq.s[m]];
+		p = ssw_init(num, readLen, mat, n, 2);
+		if (reverse == 1 && n == 5) {
+			reverse_comple(read_seq->seq.s, read_rc);
+			for (m = 0; m < readLen; ++m) num_rc[m] = table[(int)read_rc[m]];
+			p_rc = ssw_init(num_rc, readLen, mat, n, 2);
+		}else if (reverse == 1 && n == 24) {
+			fprintf (stderr, "Reverse complement alignment is not available for protein sequences. \n");
+			return 1;
+		}
+
+		ref_fp = gzopen(argv[optind], "r");
+		ref_seq = kseq_init(ref_fp);
+		while (kseq_read(ref_seq) >= 0) {
+			s_align* result, *result_rc = 0;
+			int32_t refLen = ref_seq->seq.l;
+			int8_t flag = 0;
+			while (refLen > s1) {
+				++s1;
+				kroundup32(s1);
+				ref_num = (int8_t*)realloc(ref_num, s1);
+			}
+			for (m = 0; m < refLen; ++m) ref_num[m] = table[(int)ref_seq->seq.s[m]];
+			if (path == 1) flag = 2;
+			result = ssw_align (p, ref_num, refLen, gap_open, gap_extension, flag, filter, 0, maskLen);
+			if (reverse == 1 && protein == 0)
+				result_rc = ssw_align(p_rc, ref_num, refLen, gap_open, gap_extension, flag, filter, 0, maskLen);
+			if (result_rc && result_rc->score1 > result->score1 && result_rc->score1 >= filter) {
+				if (sam) ssw_write (result_rc, ref_seq, read_seq, read_rc, table, 1, 1);
+				else ssw_write (result_rc, ref_seq, read_seq, read_rc, table, 1, 0);
+			}else if (result && result->score1 >= filter){
+				if (sam) ssw_write(result, ref_seq, read_seq, read_seq->seq.s, table, 0, 1);
+				else ssw_write(result, ref_seq, read_seq, read_seq->seq.s, table, 0, 0);
+			} else if (! result) return 1;
+			if (result_rc) align_destroy(result_rc);
+			align_destroy(result);
+		}
+
+		if(p_rc) init_destroy(p_rc);
+		init_destroy(p);
+		kseq_destroy(ref_seq);
+		gzclose(ref_fp);
+	}
+	end = clock();
+	cpu_time = ((float) (end - start)) / CLOCKS_PER_SEC;
+	fprintf(stderr, "CPU time: %f seconds\n", cpu_time);
+
+	if (num_rc) {
+		free(num_rc);
+		free(read_rc);
+	}
+	kseq_destroy(read_seq);
+	gzclose(read_fp);
+	free(num);
+	free(ref_num);
+ 	free(mata);
+	return 0;
+}
diff --git a/src/pyssw.py b/src/pyssw.py
new file mode 100755
index 0000000..89c2dc4
--- /dev/null
+++ b/src/pyssw.py
@@ -0,0 +1,384 @@
+#!/usr/bin/env python
+"""
+Simple python wrapper for SSW library
+Please put the path of libssw.so into LD_LIBRARY_PATH or pass it explicitly as a parameter
+By Yongan Zhao (March 2016)
+"""
+
+import sys
+import os.path as op
+import argparse as ap
+import ctypes as ct
+import timeit as ti
+import gzip
+import math
+
+import ssw_lib
+
+
+
+
+def read(sFile):
+    """
+    read a sequence file
+    @param  sFile   sequence file
+    """
+    def read_one_fasta(f):
+        """
+        read a fasta file
+        @param  f   file handler
+        """
+        sId = ''
+        sSeq = ''
+        for l in f:
+            if l.startswith('>'):
+                if sSeq:
+                    yield sId, sSeq, ''
+                sId = l.strip()[1:].split()[0]
+                sSeq = ''
+            else:
+                sSeq += l.strip()
+
+        yield sId, sSeq, ''
+
+    def read_one_fastq(f):
+        """
+        read a fastq file
+        @param  f   file handler
+        """
+        sId = ''
+        sSeq = ''
+        s3 = ''
+        sQual = ''
+        for l in f:
+            sId = l.strip()[1:].split()[0]
+            sSeq = f.next()
+            s3 = f.next()
+            sQual = f.next()
+
+            yield sId, sSeq, sQual
+
+# test if fasta or fastq
+    bFasta = True
+    ext = op.splitext(sFile)[1][1:].strip().lower()
+    if ext == 'gz' or ext == 'gzip':
+        with gzip.open(sFile, 'r') as f:
+            l = f.next()
+            if l.startswith('>'):
+                bFasta = True
+            elif l.startswith('@'):
+                bFasta = False
+            else:
+                print >> sys.stderr, 'file format cannot be recognized'
+                sys.exit()
+    else:
+        with open(sFile, 'r') as f:
+            l = f.next()
+            if l.startswith('>'):
+                bFasta = True
+            elif l.startswith('@'):
+                bFasta = False
+            else:
+                print >> sys.stderr, 'file format cannot be recognized'
+                sys.exit()
+
+# read
+    if ext == 'gz' or ext == 'gzip':
+        with gzip.open(sFile, 'r') as f:
+            if bFasta == True:
+                for sId,sSeq,sQual in read_one_fasta(f):
+                    yield sId, sSeq, sQual
+            else:
+                for sId,sSeq,sQual in read_one_fastq(f):
+                    yield sId, sSeq, sQual
+    else:
+        with open(sFile, 'r') as f:
+            if bFasta == True:
+                for sId,sSeq,sQual in read_one_fasta(f):
+                    yield sId, sSeq, sQual
+            else:
+                for sId,sSeq,sQual in read_one_fastq(f):
+                    yield sId, sSeq, sQual
+
+
+def to_int(seq, lEle, dEle2Int):
+    """
+    translate a sequence into numbers
+    @param  seq   a sequence
+    """
+    num_decl = len(seq) * ct.c_int8
+    num = num_decl()
+    for i,ele in enumerate(seq):
+        try:
+            n = dEle2Int[ele]
+        except KeyError:
+            n = dEle2Int[lEle[-1]]
+        finally:
+            num[i] = n
+
+    return num
+
+
+def align_one(ssw, qProfile, rNum, nRLen, nOpen, nExt, nFlag, nMaskLen):
+    """
+    align one pair of sequences
+    @param  qProfile   query profile
+    @param  rNum   number array for reference
+    @param  nRLen   length of reference sequence
+    @param  nFlag   alignment flag
+    @param  nMaskLen   mask length
+    """
+    res = ssw.ssw_align(qProfile, rNum, ct.c_int32(nRLen), nOpen, nExt, nFlag, 0, 0, nMaskLen)
+
+    nScore = res.contents.nScore
+    nScore2 = res.contents.nScore2
+    nRefBeg = res.contents.nRefBeg
+    nRefEnd = res.contents.nRefEnd
+    nQryBeg = res.contents.nQryBeg
+    nQryEnd = res.contents.nQryEnd
+    nRefEnd2 = res.contents.nRefEnd2
+    lCigar = [res.contents.sCigar[idx] for idx in range(res.contents.nCigarLen)]
+    nCigarLen = res.contents.nCigarLen
+    ssw.align_destroy(res)
+
+    return (nScore, nScore2, nRefBeg, nRefEnd, nQryBeg, nQryEnd, nRefEnd2, nCigarLen, lCigar)
+
+
+def buildPath(q, r, nQryBeg, nRefBeg, lCigar):
+    """
+    build cigar string and align path based on cigar array returned by ssw_align
+    @param  q   query sequence
+    @param  r   reference sequence
+    @param  nQryBeg   begin position of query sequence
+    @param  nRefBeg   begin position of reference sequence
+    @param  lCigar   cigar array
+    """
+    sCigarInfo = 'MIDNSHP=X'
+    sCigar = ''
+    sQ = ''
+    sA = ''
+    sR = ''
+    nQOff = nQryBeg
+    nROff = nRefBeg
+    for x in lCigar:
+        n = x >> 4
+        m = x & 15
+        if m > 8:
+            c = 'M'
+        else:
+            c = sCigarInfo[m]
+        sCigar += str(n) + c
+
+        if c == 'M':
+            sQ += q[nQOff : nQOff+n]
+            sA += ''.join(['|' if q[nQOff+j] == r[nROff+j] else '*' for j in xrange(n)])
+            sR += r[nROff : nROff+n]
+            nQOff += n
+            nROff += n
+        elif c == 'I':
+            sQ += q[nQOff : nQOff+n]
+            sA += ' ' * n
+            sR += '-' * n
+            nQOff += n
+        elif c == 'D':
+            sQ += '-' * n
+            sA += ' ' * n
+            sR += r[nROff : nROff+n]
+            nROff += n
+
+    return sCigar, sQ, sA, sR
+
+
+
+
+def main(args):
+    lEle = []
+    dRc = {} 
+    dEle2Int = {}
+    dInt2Ele = {}
+    if False == args.bProtien:
+# init DNA score matrix
+        if not args.sMatrix:
+            lEle = ['A', 'C', 'G', 'T', 'N']
+            dRc = {'A':'C', 'C':'G', 'G':'C', 'T':'A', 'a':'C', 'c':'G', 'g':'C', 't':'A'} 
+            for i,ele in enumerate(lEle):
+                dEle2Int[ele] = i
+                dEle2Int[ele.lower()] = i
+                dInt2Ele[i] = ele
+            nEleNum = len(lEle)
+            lScore = [0 for i in xrange(nEleNum**2)]
+            for i in xrange(nEleNum-1):
+                for j in xrange(nEleNum-1):
+                    if lEle[i] == lEle[j]:
+                        lScore[i*nEleNum+j] = args.nMatch
+                    else:
+                        lScore[i*nEleNum+j] = -args.nMismatch
+        else:
+            lEle, dEle2Int, dInt2Ele, lScore = ssw.read_matrix(args.sMatrix)
+    else:
+# load AA score matrix
+        if not args.sMatrix:
+            lEle = 'A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *'.split()
+            for i,ele in enumerate(lEle):
+                dEle2Int[ele] = i
+                dEle2Int[ele.lower()] = i
+                dInt2Ele[i] = ele
+            nEleNum = len(lEle)
+            lScore = ssw_lib.lBlosum50
+        else:
+            # assume the format of the input score matrix is the same as that of http://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+            lEle, dEle2Int, dInt2Ele, lScore = ssw.read_matrix(args.sMatrix)
+
+    if args.bBest and args.bProtien:
+        print >> sys.stderr, 'Reverse complement alignment is not available for protein sequences.'
+
+# translate score matrix to ctypes
+    mat = (len(lScore) * ct.c_int8) ()
+    mat[:] = lScore
+# set flag
+    nFlag = 0
+    if args.bPath:
+        nFlag = 2
+# print sam head
+    if args.bSam and args.bHeader and args.bPath:
+        print '@HD\tVN:1.4\tSO:queryname'
+        for sRId,sRSeq,_ in read(sTarget):
+            print '@SQ\tSN:{}\tLN:{}'.format(sRId, len(sRSeq))
+    elif args.bSam and not args.bPath:
+        print >> sys.stderr, 'SAM format output is only available together with option -c.\n'
+        args.bSam = False
+
+    ssw = ssw_lib.CSsw(args.sLibPath)
+# iterate query sequence
+    for sQId,sQSeq,sQQual in read(args.query):
+# build query profile
+        qNum = to_int(sQSeq, lEle, dEle2Int)
+        qProfile = ssw.ssw_init(qNum, ct.c_int32(len(sQSeq)), mat, len(lEle), 2)
+# build rc query profile
+        if args.bBest and not args.bProtien:
+            sQRcSeq = ''.join([dRc[x] for x in sQSeq[::-1]])
+            qRcNum = to_int(sQRcSeq, lEle, dEle2Int)
+            qRcProfile = ssw.ssw_init(qRcNum, ct.c_int32(len(sQSeq)), mat, len(lEle), 2)
+# set mask len
+        if len(sQSeq) > 30:
+            nMaskLen = len(sQSeq) / 2
+        else:
+            nMaskLen = 15
+
+# iter target sequence
+        for sRId,sRSeq,_ in read(args.target):
+            rNum = to_int(sRSeq, lEle, dEle2Int)
+
+# format ofres: (nScore, nScore2, nRefBeg, nRefEnd, nQryBeg, nQryEnd, nRefEnd2, nCigarLen, lCigar)
+            res = align_one(ssw, qProfile, rNum, len(sRSeq), args.nOpen, args.nExt, nFlag, nMaskLen)
+# align rc query
+            resRc = None
+            if args.bBest and not args.bProtien:
+                resRc = align_one(ssw, qRcProfile, rNum, len(sRSeq), args.nOpen, args.nExt, nFlag, nMaskLen)
+
+# build cigar and trace back path
+            strand = 0
+            if resRc == None or res[0] > resRc[0]:
+                resPrint = res
+                strand = 0
+                sCigar, sQ, sA, sR = buildPath(sQSeq, sRSeq, res[4], res[2], res[8])
+            else:
+                resPrint = resRc
+                strand = 1
+                sCigar, sQ, sA, sR = buildPath(sQRcSeq, sRSeq, resRc[4], resRc[2], resRc[8])
+
+# print results
+            if not args.bSam:
+                print 'target_name: {}\nquery_name: {}\noptimal_alignment_score: {}\t'.format(sRId, sQId, resPrint[0]),
+                if resPrint[1] > 0:
+                    print 'suboptimal_alignment_score: {}\t'.format(resPrint[1]),
+                if strand == 0:
+                    print 'strand: +\t',
+                else: 
+                    print 'strand: -\t',
+                if resPrint[2] + 1:
+                    print 'target_begin: {}\t'.format(resPrint[2] + 1),
+                print 'target_end: {}\t'.format(resPrint[3] + 1),
+                if resPrint[4] + 1:
+                    print 'query_begin: {}\t'.format(resPrint[4] + 1),
+                print 'query_end: {}\n'.format(resPrint[5] + 1)
+                if resPrint[-2] > 0:
+                    n1 = 1 + resPrint[2]
+                    n2 = min(60,len(sR)) + resPrint[2] - sR.count('-',0,60)
+                    n3 = 1 + resPrint[4]
+                    n4 = min(60,len(sQ)) + resPrint[4] - sQ.count('-',0,60)
+                    for i in range(0, len(sQ), 60):
+                        print 'Target:{:>8}\t{}\t{}'.format(n1, sR[i:i+60], n2)
+                        n1 = n2 + 1
+                        n2 = n2 + min(60,len(sR)-i-60) - sR.count('-',i+60,i+120)
+
+                        print '{: ^15}\t{}'.format('', sA[i:i+60])
+
+                        print 'Query:{:>9}\t{}\t{}\n'.format(n3, sQ[i:i+60], n4)
+                        n3 = n4 + 1
+                        n4 = n4 + min(60,len(sQ)-i-60) - sQ.count('-',i+60,i+120)
+            else:
+                print "{}\t".format(sQId),
+                if resPrint[0] == 0:
+                    print "4\t*\t0\t255\t*\t*\t0\t0\t*\t*",
+                else:
+                    mapq = int(-4.343 * math.log(1-abs(resPrint[0]-resPrint[1])/float(resPrint[0])))
+                    mapq = int(mapq + 4.99);
+                    if mapq >= 254:
+                        mapq = 254
+                    if strand == 1:
+                        print '16\t',
+                    else:
+                        print '0\t',
+                    print '{}\t{}\t{}\t'.format(sRId, resPrint[2]+1, mapq),
+                    print sCigar,
+                    print '\t*\t0\t0\t',
+                    print sQSeq[resPrint[4]:resPrint[5]+1] if strand==0 else sQRcSeq[resPrint[4]:resPrint[5]+1],
+                    print '\t',
+                    if sQQual:
+                        if strand == 0:
+                            print sQQual[resPrint[4]:resPrint[5]+1],
+                        else:
+                            print sQQual[-resPrint[4]-1:-resPrint[5]-1:-1]
+                    else:
+                        print '*',
+
+                    print '\tAS:i:{}'.format(resPrint[0]),
+                    print '\tNM:i:{}\t'.format(len(sA)-sA.count('|')),
+                    if resPrint[1] > 0:
+                        print 'ZS:i:{}'.format(resPrint[1])
+                    else:
+                        print
+
+
+        ssw.init_destroy(qProfile)
+        if args.bBest and not args.bProtien:
+            ssw.init_destroy(qRcProfile)
+
+
+if __name__ == '__main__':
+
+    parser = ap.ArgumentParser()
+    parser.add_argument('-l', '--sLibPath', default='', help='path of libssw.so')
+    parser.add_argument('-m', '--nMatch', type=int, default=2, help='a positive integer as the score for a match in genome sequence alignment. [default: 2]')
+    parser.add_argument('-x', '--nMismatch', type=int, default=2, help='a positive integer as the score for a mismatch in genome sequence alignment. [default: 2]')
+    parser.add_argument('-o', '--nOpen', type=int, default=3, help='a positive integer as the penalty for the gap opening in genome sequence alignment. [default: 3]')
+    parser.add_argument('-e', '--nExt', type=int, default=1, help='a positive integer as the penalty for the gap extension in genome sequence alignment. [default: 1]')
+    parser.add_argument('-p', '--bProtien', action='store_true', help='Do protein sequence alignment. Without this option, the ssw_test will do genome sequence alignment. [default: False]')
+    parser.add_argument('-a', '--sMatrix', default='', help='a file for either Blosum or Pam weight matrix. [default: Blosum50]')
+    parser.add_argument('-c', '--bPath', action='store_true', help='Return the alignment path. [default: False]')
+    parser.add_argument('-f', '--nThr', default=0, help='a positive integer. Only output the alignments with the Smith-Waterman score >= N.')
+    parser.add_argument('-r', '--bBest', action='store_true', help='The best alignment will be picked between the original read alignment and the reverse complement read alignment. [default: False]')
+    parser.add_argument('-s', '--bSam', action='store_true', help='Output in SAM format. [default: no header]')
+    parser.add_argument('-header', '--bHeader', action='store_true', help='If -s is used, include header in SAM output.')
+    parser.add_argument('target', help='targe file')
+    parser.add_argument('query', help='query file')
+    if len(sys.argv) == 1:
+        parser.print_help()
+        sys.exit()
+    args = parser.parse_args()
+
+    t1 = ti.default_timer()
+    main(args)
+    t2 = ti.default_timer()
+    print >> sys.stderr, 'CPU time: {} seconds'.format(t2 - t1)
diff --git a/src/ssw.c b/src/ssw.c
new file mode 100644
index 0000000..bf31ba5
--- /dev/null
+++ b/src/ssw.c
@@ -0,0 +1,884 @@
+/* The MIT License
+
+   Copyright (c) 2012-1015 Boston College.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   "Software"), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be
+   included in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+   BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+   ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+   CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+   SOFTWARE.
+*/
+
+/* Contact: Mengyao Zhao <zhangmp at bc.edu> */
+
+/*
+ *  ssw.c
+ *
+ *  Created by Mengyao Zhao on 6/22/10.
+ *  Copyright 2010 Boston College. All rights reserved.
+ *	Version 0.1.4
+ *	Last revision by Mengyao Zhao on 02/11/16.
+ *
+ */
+
+#include <emmintrin.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "ssw.h"
+
+#ifdef __GNUC__
+#define LIKELY(x) __builtin_expect((x),1)
+#define UNLIKELY(x) __builtin_expect((x),0)
+#else
+#define LIKELY(x) (x)
+#define UNLIKELY(x) (x)
+#endif
+
+/* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */
+#define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; }
+
+/* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */
+#define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; }
+
+/*! @function
+  @abstract  Round an integer to the next closest power-2 integer.
+  @param  x  integer to be rounded (in place)
+  @discussion x will be modified.
+ */
+#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
+
+typedef struct {
+	uint16_t score;
+	int32_t ref;	 //0-based position
+	int32_t read;    //alignment ending position on read, 0-based
+} alignment_end;
+
+typedef struct {
+	uint32_t* seq;
+	int32_t length;
+} cigar;
+
+struct _profile{
+	__m128i* profile_byte;	// 0: none
+	__m128i* profile_word;	// 0: none
+	const int8_t* read;
+	const int8_t* mat;
+	int32_t readLen;
+	int32_t n;
+	uint8_t bias;
+};
+
+/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
+static __m128i* qP_byte (const int8_t* read_num,
+				  const int8_t* mat,
+				  const int32_t readLen,
+				  const int32_t n,	/* the edge length of the squre matrix mat */
+				  uint8_t bias) {
+
+	int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces.
+								     Each piece is 8 bit. Split the read into 16 segments.
+								     Calculat 16 segments in parallel.
+								   */
+	__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
+	int8_t* t = (int8_t*)vProfile;
+	int32_t nt, i, j, segNum;
+
+	/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
+	for (nt = 0; LIKELY(nt < n); nt ++) {
+		for (i = 0; i < segLen; i ++) {
+			j = i;
+			for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) {
+				*t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias;
+				j += segLen;
+			}
+		}
+	}
+	return vProfile;
+}
+
+/* Striped Smith-Waterman
+   Record the highest score of each reference position.
+   Return the alignment score and ending position of the best alignment, 2nd best alignment, etc.
+   Gap begin and gap extension are different.
+   wight_match > 0, all other weights < 0.
+   The returned positions are 0-based.
+ */
+static alignment_end* sw_sse2_byte (const int8_t* ref,
+							 int8_t ref_dir,	// 0: forward ref; 1: reverse ref
+							 int32_t refLen,
+							 int32_t readLen,
+							 const uint8_t weight_gapO, /* will be used as - */
+							 const uint8_t weight_gapE, /* will be used as - */
+							 const __m128i* vProfile,
+							 uint8_t terminate,	/* the best alignment score: used to terminate
+												   the matrix calculation when locating the
+												   alignment beginning point. If this score
+												   is set to 0, it will not be used */
+	 						 uint8_t bias,  /* Shift 0 point to a positive value. */
+							 int32_t maskLen) {
+
+#define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
+					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
+					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
+					  (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
+					  (m) = _mm_extract_epi16((vm), 0)
+
+	uint8_t max = 0;		                     /* the max alignment score */
+	int32_t end_read = readLen - 1;
+	int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */
+	int32_t segLen = (readLen + 15) / 16; /* number of segment */
+
+	/* array to record the largest score of each reference position */
+	uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1);
+
+	/* array to record the alignment read ending position of the largest score of each reference position */
+	int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
+
+	/* Define 16 byte 0 vector. */
+	__m128i vZero = _mm_set1_epi32(0);
+
+	__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
+
+	int32_t i, j;
+	/* 16 byte insertion begin vector */
+	__m128i vGapO = _mm_set1_epi8(weight_gapO);
+
+	/* 16 byte insertion extension vector */
+	__m128i vGapE = _mm_set1_epi8(weight_gapE);
+
+	/* 16 byte bias vector */
+	__m128i vBias = _mm_set1_epi8(bias);
+
+	__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
+	__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
+	__m128i vTemp;
+	int32_t edge, begin = 0, end = refLen, step = 1;
+
+	/* outer loop to process the reference sequence */
+	if (ref_dir == 1) {
+		begin = refLen - 1;
+		end = -1;
+		step = -1;
+	}
+	for (i = begin; LIKELY(i != end); i += step) {
+//fprintf(stderr, "%d", ref[i]);
+		int32_t cmp;
+		__m128i e, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
+							   Any errors to vH values will be corrected in the Lazy_F loop.
+							 */
+//		max16(maxColumn[i], vMaxColumn);
+//		fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
+
+		__m128i vH = pvHStore[segLen - 1];
+		vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
+		const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
+
+		/* Swap the 2 H buffers. */
+		__m128i* pv = pvHLoad;
+		pvHLoad = pvHStore;
+		pvHStore = pv;
+
+		/* inner loop to process the query sequence */
+		for (j = 0; LIKELY(j < segLen); ++j) {
+			vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
+			vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
+	//	max16(maxColumn[i], vH);
+	//	fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]);
+//	int8_t* t;
+//	int32_t ti;
+//for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
+
+			/* Get max from vH, vE and vF. */
+			e = _mm_load_si128(pvE + j);
+			vH = _mm_max_epu8(vH, e);
+			vH = _mm_max_epu8(vH, vF);
+			vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
+
+	//	max16(maxColumn[i], vMaxColumn);
+	//	fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
+//	for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
+
+			/* Save vH values. */
+			_mm_store_si128(pvHStore + j, vH);
+
+			/* Update vE value. */
+			vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
+			e = _mm_subs_epu8(e, vGapE);
+			e = _mm_max_epu8(e, vH);
+			_mm_store_si128(pvE + j, e);
+
+			/* Update vF value. */
+			vF = _mm_subs_epu8(vF, vGapE);
+			vF = _mm_max_epu8(vF, vH);
+
+			/* Load the next vH. */
+			vH = _mm_load_si128(pvHLoad + j);
+		}
+
+		/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
+        /* reset pointers to the start of the saved data */
+        j = 0;
+        vH = _mm_load_si128 (pvHStore + j);
+
+        /*  the computed vF value is for the given column.  since */
+        /*  we are at the end, we need to shift the vF value over */
+        /*  to the next column. */
+        vF = _mm_slli_si128 (vF, 1);
+        vTemp = _mm_subs_epu8 (vH, vGapO);
+		vTemp = _mm_subs_epu8 (vF, vTemp);
+		vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
+		cmp  = _mm_movemask_epi8 (vTemp);
+
+        while (cmp != 0xffff)
+        {
+            vH = _mm_max_epu8 (vH, vF);
+			vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
+            _mm_store_si128 (pvHStore + j, vH);
+            vF = _mm_subs_epu8 (vF, vGapE);
+            j++;
+            if (j >= segLen)
+            {
+                j = 0;
+                vF = _mm_slli_si128 (vF, 1);
+            }
+            vH = _mm_load_si128 (pvHStore + j);
+
+            vTemp = _mm_subs_epu8 (vH, vGapO);
+            vTemp = _mm_subs_epu8 (vF, vTemp);
+            vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
+            cmp  = _mm_movemask_epi8 (vTemp);
+        }
+
+		vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
+		vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
+		cmp = _mm_movemask_epi8(vTemp);
+		if (cmp != 0xffff) {
+			uint8_t temp;
+			vMaxMark = vMaxScore;
+			max16(temp, vMaxScore);
+			vMaxScore = vMaxMark;
+
+			if (LIKELY(temp > max)) {
+				max = temp;
+				if (max + bias >= 255) break;	//overflow
+				end_ref = i;
+
+				/* Store the column with the highest alignment score in order to trace the alignment ending position on read. */
+				for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
+			}
+		}
+
+		/* Record the max score of current column. */
+		max16(maxColumn[i], vMaxColumn);
+	//	fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
+		if (maxColumn[i] == terminate) break;
+	}
+
+	/* Trace the alignment ending position on read. */
+	uint8_t *t = (uint8_t*)pvHmax;
+	int32_t column_len = segLen * 16;
+	for (i = 0; LIKELY(i < column_len); ++i, ++t) {
+		int32_t temp;
+		if (*t == max) {
+			temp = i / 16 + i % 16 * segLen;
+			if (temp < end_read) end_read = temp;
+		}
+	}
+
+	free(pvHmax);
+	free(pvE);
+	free(pvHLoad);
+	free(pvHStore);
+
+	/* Find the most possible 2nd best alignment. */
+	alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
+	bests[0].score = max + bias >= 255 ? 255 : max;
+	bests[0].ref = end_ref;
+	bests[0].read = end_read;
+
+	bests[1].score = 0;
+	bests[1].ref = 0;
+	bests[1].read = 0;
+
+	edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
+	for (i = 0; i < edge; i ++) {
+//			fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
+		if (maxColumn[i] > bests[1].score) {
+			bests[1].score = maxColumn[i];
+			bests[1].ref = i;
+		}
+	}
+	edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
+	for (i = edge + 1; i < refLen; i ++) {
+//			fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]);
+		if (maxColumn[i] > bests[1].score) {
+			bests[1].score = maxColumn[i];
+			bests[1].ref = i;
+		}
+	}
+
+	free(maxColumn);
+	free(end_read_column);
+	return bests;
+}
+
+static __m128i* qP_word (const int8_t* read_num,
+				  const int8_t* mat,
+				  const int32_t readLen,
+				  const int32_t n) {
+
+	int32_t segLen = (readLen + 7) / 8;
+	__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
+	int16_t* t = (int16_t*)vProfile;
+	int32_t nt, i, j;
+	int32_t segNum;
+
+	/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
+	for (nt = 0; LIKELY(nt < n); nt ++) {
+		for (i = 0; i < segLen; i ++) {
+			j = i;
+			for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) {
+				*t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]];
+				j += segLen;
+			}
+		}
+	}
+	return vProfile;
+}
+
+static alignment_end* sw_sse2_word (const int8_t* ref,
+							 int8_t ref_dir,	// 0: forward ref; 1: reverse ref
+							 int32_t refLen,
+							 int32_t readLen,
+							 const uint8_t weight_gapO, /* will be used as - */
+							 const uint8_t weight_gapE, /* will be used as - */
+							 const __m128i* vProfile,
+							 uint16_t terminate,
+							 int32_t maskLen) {
+
+#define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
+					(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
+					(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
+					(m) = _mm_extract_epi16((vm), 0)
+
+	uint16_t max = 0;		                     /* the max alignment score */
+	int32_t end_read = readLen - 1;
+	int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */
+	int32_t segLen = (readLen + 7) / 8; /* number of segment */
+
+	/* array to record the largest score of each reference position */
+	uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2);
+
+	/* array to record the alignment read ending position of the largest score of each reference position */
+	int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
+
+	/* Define 16 byte 0 vector. */
+	__m128i vZero = _mm_set1_epi32(0);
+
+	__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
+	__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
+
+	int32_t i, j, k;
+	/* 16 byte insertion begin vector */
+	__m128i vGapO = _mm_set1_epi16(weight_gapO);
+
+	/* 16 byte insertion extension vector */
+	__m128i vGapE = _mm_set1_epi16(weight_gapE);
+
+	__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
+	__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
+	__m128i vTemp;
+	int32_t edge, begin = 0, end = refLen, step = 1;
+
+	/* outer loop to process the reference sequence */
+	if (ref_dir == 1) {
+		begin = refLen - 1;
+		end = -1;
+		step = -1;
+	}
+	for (i = begin; LIKELY(i != end); i += step) {
+		int32_t cmp;
+		__m128i e, vF = vZero; /* Initialize F value to 0.
+							   Any errors to vH values will be corrected in the Lazy_F loop.
+							 */
+		__m128i vH = pvHStore[segLen - 1];
+		vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
+
+		/* Swap the 2 H buffers. */
+		__m128i* pv = pvHLoad;
+
+		__m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
+
+		const __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
+		pvHLoad = pvHStore;
+		pvHStore = pv;
+
+		/* inner loop to process the query sequence */
+		for (j = 0; LIKELY(j < segLen); j ++) {
+			vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
+
+			/* Get max from vH, vE and vF. */
+			e = _mm_load_si128(pvE + j);
+			vH = _mm_max_epi16(vH, e);
+			vH = _mm_max_epi16(vH, vF);
+			vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
+
+			/* Save vH values. */
+			_mm_store_si128(pvHStore + j, vH);
+
+			/* Update vE value. */
+			vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
+			e = _mm_subs_epu16(e, vGapE);
+			e = _mm_max_epi16(e, vH);
+			_mm_store_si128(pvE + j, e);
+
+			/* Update vF value. */
+			vF = _mm_subs_epu16(vF, vGapE);
+			vF = _mm_max_epi16(vF, vH);
+
+			/* Load the next vH. */
+			vH = _mm_load_si128(pvHLoad + j);
+		}
+
+		/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
+		for (k = 0; LIKELY(k < 8); ++k) {
+			vF = _mm_slli_si128 (vF, 2);
+			for (j = 0; LIKELY(j < segLen); ++j) {
+				vH = _mm_load_si128(pvHStore + j);
+				vH = _mm_max_epi16(vH, vF);
+				vMaxColumn = _mm_max_epi16(vMaxColumn, vH); //newly added line
+				_mm_store_si128(pvHStore + j, vH);
+				vH = _mm_subs_epu16(vH, vGapO);
+				vF = _mm_subs_epu16(vF, vGapE);
+				if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
+			}
+		}
+
+end:
+		vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
+		vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
+		cmp = _mm_movemask_epi8(vTemp);
+		if (cmp != 0xffff) {
+			uint16_t temp;
+			vMaxMark = vMaxScore;
+			max8(temp, vMaxScore);
+			vMaxScore = vMaxMark;
+
+			if (LIKELY(temp > max)) {
+				max = temp;
+				end_ref = i;
+				for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
+			}
+		}
+
+		/* Record the max score of current column. */
+		max8(maxColumn[i], vMaxColumn);
+		if (maxColumn[i] == terminate) break;
+	}
+
+	/* Trace the alignment ending position on read. */
+	uint16_t *t = (uint16_t*)pvHmax;
+	int32_t column_len = segLen * 8;
+	for (i = 0; LIKELY(i < column_len); ++i, ++t) {
+		int32_t temp;
+		if (*t == max) {
+			temp = i / 8 + i % 8 * segLen;
+			if (temp < end_read) end_read = temp;
+		}
+	}
+
+	free(pvHmax);
+	free(pvE);
+	free(pvHLoad);
+	free(pvHStore);
+
+	/* Find the most possible 2nd best alignment. */
+	alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
+	bests[0].score = max;
+	bests[0].ref = end_ref;
+	bests[0].read = end_read;
+
+	bests[1].score = 0;
+	bests[1].ref = 0;
+	bests[1].read = 0;
+
+	edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
+	for (i = 0; i < edge; i ++) {
+		if (maxColumn[i] > bests[1].score) {
+			bests[1].score = maxColumn[i];
+			bests[1].ref = i;
+		}
+	}
+	edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
+	for (i = edge; i < refLen; i ++) {
+		if (maxColumn[i] > bests[1].score) {
+			bests[1].score = maxColumn[i];
+			bests[1].ref = i;
+		}
+	}
+
+	free(maxColumn);
+	free(end_read_column);
+	return bests;
+}
+
+static cigar* banded_sw (const int8_t* ref,
+				 const int8_t* read,
+				 int32_t refLen,
+				 int32_t readLen,
+				 int32_t score,
+				 const uint32_t weight_gapO,  /* will be used as - */
+				 const uint32_t weight_gapE,  /* will be used as - */
+				 int32_t band_width,
+				 const int8_t* mat,	/* pointer to the weight matrix */
+				 int32_t n) {
+
+	uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
+	int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, l, max = 0;
+	int64_t s2 = 1024;
+	char op, prev_op;
+	int32_t width, width_d, *h_b, *e_b, *h_c;
+	int8_t *direction, *direction_line;
+	cigar* result = (cigar*)malloc(sizeof(cigar));
+	h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
+	e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
+	h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
+	direction = (int8_t*)malloc(s2 * sizeof(int8_t));
+
+	do {
+		width = band_width * 2 + 3, width_d = band_width * 2 + 1;
+		while (width >= s1) {
+			++s1;
+			kroundup32(s1);
+			h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
+			e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
+			h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
+		}
+		while (width_d * readLen * 3 >= s2) {
+			++s2;
+			kroundup32(s2);
+			if (s2 < 0) {
+				fprintf(stderr, "Alignment score and position are not consensus.\n");
+				exit(1);
+			}
+			direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
+		}
+		direction_line = direction;
+		for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
+		for (i = 0; LIKELY(i < readLen); i ++) {
+			int32_t beg = 0, end = refLen - 1, u = 0, edge;
+			j = i - band_width;	beg = beg > j ? beg : j; // band start
+			j = i + band_width; end = end < j ? end : j; // band end
+			edge = end + 1 < width - 1 ? end + 1 : width - 1;
+			f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
+			direction_line = direction + width_d * i * 3;
+
+			for (j = beg; LIKELY(j <= end); j ++) {
+				int32_t b, e1, f1, d, de, df, dh;
+				set_u(u, band_width, i, j);	set_u(e, band_width, i - 1, j);
+				set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
+				set_d(de, band_width, i, j, 0);
+				set_d(df, band_width, i, j, 1);
+				set_d(dh, band_width, i, j, 2);
+
+				temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
+				temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
+				e_b[u] = temp1 > temp2 ? temp1 : temp2;
+				//fprintf(stderr, "de: %d\twidth_d: %d\treadLen: %d\ts2:%lu\n", de, width_d, readLen, s2);
+				direction_line[de] = temp1 > temp2 ? 3 : 2;
+
+				temp1 = h_c[b] - weight_gapO;
+				temp2 = f - weight_gapE;
+				f = temp1 > temp2 ? temp1 : temp2;
+				direction_line[df] = temp1 > temp2 ? 5 : 4;
+
+				e1 = e_b[u] > 0 ? e_b[u] : 0;
+				f1 = f > 0 ? f : 0;
+				temp1 = e1 > f1 ? e1 : f1;
+				temp2 = h_b[d] + mat[ref[j] * n + read[i]];
+				h_c[u] = temp1 > temp2 ? temp1 : temp2;
+
+				if (h_c[u] > max) max = h_c[u];
+
+				if (temp1 <= temp2) direction_line[dh] = 1;
+				else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
+			}
+			for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
+		}
+		band_width *= 2;
+	} while (LIKELY(max < score));
+	band_width /= 2;
+
+	// trace back
+	i = readLen - 1;
+	j = refLen - 1;
+	e = 0;	// Count the number of M, D or I.
+	l = 0;	// record length of current cigar
+	op = prev_op = 'M';
+	temp2 = 2;	// h
+	while (LIKELY(i > 0)) {
+		set_d(temp1, band_width, i, j, temp2);
+		switch (direction_line[temp1]) {
+			case 1:
+				--i;
+				--j;
+				temp2 = 2;
+				direction_line -= width_d * 3;
+				op = 'M';
+				break;
+			case 2:
+			 	--i;
+				temp2 = 0;	// e
+				direction_line -= width_d * 3;
+				op = 'I';
+				break;
+			case 3:
+				--i;
+				temp2 = 2;
+				direction_line -= width_d * 3;
+				op = 'I';
+				break;
+			case 4:
+				--j;
+				temp2 = 1;
+				op = 'D';
+				break;
+			case 5:
+				--j;
+				temp2 = 2;
+				op = 'D';
+				break;
+			default:
+				fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
+				free(direction);
+				free(h_c);
+				free(e_b);
+				free(h_b);
+				free(c);
+				free(result);
+				return 0;
+		}
+		if (op == prev_op) ++e;
+		else {
+			++l;
+			while (l >= s) {
+				++s;
+				kroundup32(s);
+				c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
+			}
+			c[l - 1] = to_cigar_int(e, prev_op);
+			prev_op = op;
+			e = 1;
+		}
+	}
+	if (op == 'M') {
+		++l;
+		while (l >= s) {
+			++s;
+			kroundup32(s);
+			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
+		}
+		c[l - 1] = to_cigar_int(e + 1, op);
+	}else {
+		l += 2;
+		while (l >= s) {
+			++s;
+			kroundup32(s);
+			c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
+		}
+		c[l - 2] = to_cigar_int(e, op);
+		c[l - 1] = to_cigar_int(1, 'M');
+	}
+
+	// reverse cigar
+	c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
+	s = 0;
+	e = l - 1;
+	while (LIKELY(s <= e)) {
+		c1[s] = c[e];
+		c1[e] = c[s];
+		++ s;
+		-- e;
+	}
+	result->seq = c1;
+	result->length = l;
+
+	free(direction);
+	free(h_c);
+	free(e_b);
+	free(h_b);
+	free(c);
+	return result;
+}
+
+static int8_t* seq_reverse(const int8_t* seq, int32_t end)	/* end is 0-based alignment ending position */
+{
+	int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t));
+	int32_t start = 0;
+	while (LIKELY(start <= end)) {
+		reverse[start] = seq[end];
+		reverse[end] = seq[start];
+		++ start;
+		-- end;
+	}
+	return reverse;
+}
+
+s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) {
+	s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile));
+	p->profile_byte = 0;
+	p->profile_word = 0;
+	p->bias = 0;
+
+	if (score_size == 0 || score_size == 2) {
+		/* Find the bias to use in the substitution matrix */
+		int32_t bias = 0, i;
+		for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i];
+		bias = abs(bias);
+
+		p->bias = bias;
+		p->profile_byte = qP_byte (read, mat, readLen, n, bias);
+	}
+	if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n);
+	p->read = read;
+	p->mat = mat;
+	p->readLen = readLen;
+	p->n = n;
+	return p;
+}
+
+void init_destroy (s_profile* p) {
+	free(p->profile_byte);
+	free(p->profile_word);
+	free(p);
+}
+
+s_align* ssw_align (const s_profile* prof,
+					const int8_t* ref,
+				  	int32_t refLen,
+				  	const uint8_t weight_gapO,
+				  	const uint8_t weight_gapE,
+					const uint8_t flag,	//  (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
+					const uint16_t filters,
+					const int32_t filterd,
+					const int32_t maskLen) {
+
+	alignment_end* bests = 0, *bests_reverse = 0;
+	__m128i* vP = 0;
+	int32_t word = 0, band_width = 0, readLen = prof->readLen;
+	int8_t* read_reverse = 0;
+	cigar* path;
+	s_align* r = (s_align*)calloc(1, sizeof(s_align));
+	r->ref_begin1 = -1;
+	r->read_begin1 = -1;
+	r->cigar = 0;
+	r->cigarLen = 0;
+	if (maskLen < 15) {
+		fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
+	}
+
+	// Find the alignment scores and ending positions
+	if (prof->profile_byte) {
+		bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
+		if (prof->profile_word && bests[0].score == 255) {
+			free(bests);
+			bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
+			word = 1;
+		} else if (bests[0].score == 255) {
+			fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
+			free(r);
+			return NULL;
+		}
+	}else if (prof->profile_word) {
+		bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
+		word = 1;
+	}else {
+		fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
+		free(r);
+		return NULL;
+	}
+	r->score1 = bests[0].score;
+	r->ref_end1 = bests[0].ref;
+//fprintf(stderr, "0based ref_end: %d\n", r->ref_end1);
+	r->read_end1 = bests[0].read;
+	if (maskLen >= 15) {
+		r->score2 = bests[1].score;
+		r->ref_end2 = bests[1].ref;
+	} else {
+		r->score2 = 0;
+		r->ref_end2 = -1;
+	}
+	free(bests);
+	if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;
+
+	// Find the beginning position of the best alignment.
+	read_reverse = seq_reverse(prof->read, r->read_end1);
+	if (word == 0) {
+		vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
+		bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
+	} else {
+		vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
+		bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
+	}
+	free(vP);
+	free(read_reverse);
+	r->ref_begin1 = bests_reverse[0].ref;
+	r->read_begin1 = r->read_end1 - bests_reverse[0].read;
+	free(bests_reverse);
+	if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;
+
+	// Generate cigar.
+	refLen = r->ref_end1 - r->ref_begin1 + 1;
+	readLen = r->read_end1 - r->read_begin1 + 1;
+	band_width = abs(refLen - readLen) + 1;
+	path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
+	if (path == 0) {
+		free(r);
+		r = NULL;
+	}
+	else {
+		r->cigar = path->seq;
+		r->cigarLen = path->length;
+		free(path);
+	}
+
+end:
+	return r;
+}
+
+void align_destroy (s_align* a) {
+	free(a->cigar);
+	free(a);
+}
+/*
+inline char cigar_int_to_op(uint32_t cigar_int) {
+	return UNLIKELY((cigar_int & 0xfU) > 8) ? 'M': MAPSTR[cigar_int & 0xfU];
+}
+
+
+inline uint32_t cigar_int_to_len (uint32_t cigar_int)
+{
+	return cigar_int >> BAM_CIGAR_SHIFT;
+}*/
diff --git a/src/ssw.h b/src/ssw.h
new file mode 100644
index 0000000..685ecf3
--- /dev/null
+++ b/src/ssw.h
@@ -0,0 +1,188 @@
+/*
+ *  ssw.h
+ *
+ *  Created by Mengyao Zhao on 6/22/10.
+ *  Copyright 2010 Boston College. All rights reserved.
+ *	Version 0.1.4
+ *	Last revision by Mengyao Zhao on 02/11/16.
+ *
+ */
+
+#ifndef SSW_H
+#define SSW_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <emmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif	// __cplusplus
+
+#define MAPSTR "MIDNSHP=X"
+#ifndef BAM_CIGAR_SHIFT
+#define BAM_CIGAR_SHIFT 4
+#endif
+
+
+/*!	@typedef	structure of the query profile	*/
+struct _profile;
+typedef struct _profile s_profile;
+
+/*!	@typedef	structure of the alignment result
+	@field	score1	the best alignment score
+	@field	score2	sub-optimal alignment score
+	@field	ref_begin1	0-based best alignment beginning position on reference;	ref_begin1 = -1 when the best alignment beginning
+						position is not available
+	@field	ref_end1	0-based best alignment ending position on reference
+	@field	read_begin1	0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
+						position is not available
+	@field	read_end1	0-based best alignment ending position on read
+	@field	read_end2	0-based sub-optimal alignment ending position on read
+	@field	cigar	best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2);
+					cigar = 0 when the best alignment path is not available
+	@field	cigarLen	length of the cigar string; cigarLen = 0 when the best alignment path is not available
+*/
+typedef struct {
+	uint16_t score1;
+	uint16_t score2;
+	int32_t ref_begin1;
+	int32_t ref_end1;
+	int32_t	read_begin1;
+	int32_t read_end1;
+	int32_t ref_end2;
+	uint32_t* cigar;
+	int32_t cigarLen;
+} s_align;
+
+/*!	@function	Create the query profile using the query sequence.
+	@param	read	pointer to the query sequence; the query sequence needs to be numbers
+	@param	readLen	length of the query sequence
+	@param	mat	pointer to the substitution matrix; mat needs to be corresponding to the read sequence
+	@param	n	the square root of the number of elements in mat (mat has n*n elements)
+	@param	score_size	estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
+						your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
+	@return	pointer to the query profile structure
+	@note	example for parameter read and mat:
+			If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142
+			Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be:
+			//A  C  G  T
+			  2 -2 -2 -2 //A
+			 -2  2 -2 -2 //C
+			 -2 -2  2 -2 //G
+			 -2 -2 -2  2 //T
+			mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2}
+*/
+s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size);
+
+/*!	@function	Release the memory allocated by function ssw_init.
+	@param	p	pointer to the query profile structure
+*/
+void init_destroy (s_profile* p);
+
+// @function	ssw alignment.
+/*!	@function	Do Striped Smith-Waterman alignment.
+	@param	prof	pointer to the query profile structure
+	@param	ref	pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of
+				function ssw_init
+	@param	refLen	length of the target sequence
+	@param	weight_gapO	the absolute value of gap open penalty
+	@param	weight_gapE	the absolute value of gap extension penalty
+	@param	flag	bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment
+					beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
+					< filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
+					cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
+  					will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
+ 					setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only
+					the optimal and sub-optimal scores and the optimal alignment ending position will be returned.
+	@param	filters	score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
+ 					decription of the flag parameter for detailed usage.)
+	@param	filterd	distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
+					the decription of the flag parameter for detailed usage.)
+	@param	maskLen	The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
+					readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
+					return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
+					alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
+					largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
+					picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
+					reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
+					score from the unmasked elements.
+	@return	pointer to the alignment result structure
+	@note	Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
+			and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
+			while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
+			0-based coordinate.
+*/
+s_align* ssw_align (const s_profile* prof,
+					const int8_t* ref,
+					int32_t refLen,
+					const uint8_t weight_gapO,
+					const uint8_t weight_gapE,
+					const uint8_t flag,
+					const uint16_t filters,
+					const int32_t filterd,
+					const int32_t maskLen);
+
+/*!	@function	Release the memory allocated by function ssw_align.
+	@param	a	pointer to the alignment result structure
+*/
+void align_destroy (s_align* a);
+
+/*!	@function		Produce CIGAR 32-bit unsigned integer from CIGAR operation and CIGAR length
+	@param	length		length of CIGAR
+	@param	op_letter	CIGAR operation character ('M', 'I', etc)
+	@return			32-bit unsigned integer, representing encoded CIGAR operation and length
+*/
+static inline uint32_t to_cigar_int (uint32_t length, char op_letter)
+{
+	switch (op_letter) {
+		case 'M': /* alignment match (can be a sequence match or mismatch */
+		default:
+			return length << BAM_CIGAR_SHIFT;
+		case 'S': /* soft clipping (clipped sequences present in SEQ) */
+			return (length << BAM_CIGAR_SHIFT) | (4u);
+		case 'D': /* deletion from the reference */
+			return (length << BAM_CIGAR_SHIFT) | (2u);
+		case 'I': /* insertion to the reference */
+			return (length << BAM_CIGAR_SHIFT) | (1u);
+		case 'H': /* hard clipping (clipped sequences NOT present in SEQ) */
+			return (length << BAM_CIGAR_SHIFT) | (5u);
+		case 'N': /* skipped region from the reference */
+			return (length << BAM_CIGAR_SHIFT) | (3u);
+		case 'P': /* padding (silent deletion from padded reference) */
+			return (length << BAM_CIGAR_SHIFT) | (6u);
+		case '=': /* sequence match */
+			return (length << BAM_CIGAR_SHIFT) | (7u);
+		case 'X': /* sequence mismatch */
+			return (length << BAM_CIGAR_SHIFT) | (8u);
+	}
+	return (uint32_t)-1; // This never happens
+}
+
+
+/*!	@function		Extract CIGAR operation character from CIGAR 32-bit unsigned integer
+	@param	cigar_int	32-bit unsigned integer, representing encoded CIGAR operation and length
+	@return			CIGAR operation character ('M', 'I', etc)
+*/
+//char cigar_int_to_op (uint32_t cigar_int);
+static inline char cigar_int_to_op(uint32_t cigar_int) 
+{
+	return (cigar_int & 0xfU) > 8 ? 'M': MAPSTR[cigar_int & 0xfU];
+}
+
+
+/*!	@function		Extract length of a CIGAR operation from CIGAR 32-bit unsigned integer
+	@param	cigar_int	32-bit unsigned integer, representing encoded CIGAR operation and length
+	@return			length of CIGAR operation
+*/
+//uint32_t cigar_int_to_len (uint32_t cigar_int);
+static inline uint32_t cigar_int_to_len (uint32_t cigar_int)
+{
+	return cigar_int >> BAM_CIGAR_SHIFT;
+}
+#ifdef __cplusplus
+}
+#endif	// __cplusplus
+
+#endif	// SSW_H
diff --git a/src/ssw/Aligner.java b/src/ssw/Aligner.java
new file mode 100644
index 0000000..8490532
--- /dev/null
+++ b/src/ssw/Aligner.java
@@ -0,0 +1,161 @@
+package ssw;
+
+
+/**
+ * JNI wrapper for Striped Smith-Waterman alignment
+ * 
+ * @author Daniel Cameron
+ *
+ */
+public class Aligner {
+	/**
+	 * your estimated best alignment score is surely < 255
+	 */
+	public static final int MAX_SCORE_LESS_THAN_255 = 0;
+	/**
+	 * if your estimated best alignment score >= 255
+	 */
+	public static final int MAX_SCORE_GREATER_THAN_255 = 1;
+	public static final int MAX_SCORE_UNSURE = 2;
+	//public static final int FLAG_UNUSED = 0x80;
+	//public static final int FLAG_UNUSED = 0x40;
+	//public static final int FLAG_UNUSED = 0x20;
+	//public static final int FLAG_UNUSED = 0x10;
+	/**
+	 *  bit 5: when setted as 1, the function will return the best alignment beginning position
+	 */
+	public static final int FLAG_INCLUDE_BEST_ALIGNMENT_POSITION = 0x08;
+	/**
+	 * bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1 < filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and cigar;  
+	 */
+	public static final int FLAG_INCLUDE_BEST_ALIGNMENT_POSITION_AND_CIGAR_IF_MORE_THAN_FILTER_DISTANCE_POSITIONS_ALIGNED = 0x04;
+	/**
+	 * bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function will return the best alignment beginning position and cigar;
+	 */
+	public static final int FLAG_INCLUDE_BEST_ALIGNMENT_POSITION_AND_CIGAR_IF_SCORE_GREATER_THAN_FILTER_SORE = 0x02;
+	/**
+	 * bit 8: when setted as 1, (whatever bit 5, 6 or 7 is setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only the optimal and sub-optimal scores and the optimal alignment ending position will be returned.
+	 */
+	public static final int FLAG_INCLUDE_BEST_ALIGNMENT_POSITION_AND_CIGAR = 0x01;
+	/**
+	 * Do Striped Smith-Waterman alignment. Warning: No parameter checking is performed. Incorrect arguments are likely to crash the JVM.
+	 * 
+	 * @param read pointer to the query sequence; the query sequence needs to be numbers
+	 * @param flattenedMatrix pointer to the substitution matrix; mat needs to be corresponding to the read sequence
+	 * @param n the square root of the number of elements in mat (mat has n*n elements)
+	 * @param score_size estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
+					your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
+	 * @param ref pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of function initprofile
+	 * @param gapOpen the absolute value of gap open penalty.
+	 * @param gapExtend the absolute value of gap extension penalty.
+	 * @param flag bitwise FLAG; (from high to low) bit 5: when setted as 1, the function will return the best alignment
+					beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
+					< filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
+					cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
+  					will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
+ 					setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only
+					the optimal and sub-optimal scores and the optimal alignment ending position will be returned.
+						Note: Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
+						and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
+						while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
+						0-based coordinate.
+	 * @param filterscore score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
+ 					decription of the flag parameter for detailed usage.)
+	 * @param filterdistance distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
+					the decription of the flag parameter for detailed usage.)
+	 * @param maskLen The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
+					readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
+					return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
+					alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
+					largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
+					picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
+					reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
+					score from the unmasked elements.
+	 * @return Smith-Waterman alignment
+	 */
+	public static native Alignment align(byte[] read, byte[] flattenedMatrix, int n, int score_size, byte[] ref, int gapOpen, int gapExtend, int flag, short filterscore, int filterdistance, int maskLen);
+	/**
+	 * Performs striped Smith-Waterman alignment
+	 * 
+	 * @param read read sequence
+	 * @param ref reference sequence
+	 * @param matrix scoring matrix
+	 * @param gapOpen absolute value of gap open penalty
+	 * @param gapExtend absolute value of gap extension penalty
+	 * @param ignoreCase upper and lowercase sequence values are treated as the same value.
+	 * @return Smith-Waterman alignment
+	 */
+	public static Alignment align(byte[] read, byte[] ref, int[][] matrix, int gapOpen, int gapExtend, boolean ignoreCase) {
+		if (gapOpen < 0 || gapExtend < 0) throw new IllegalArgumentException("Gap open and extension penalties must be positive");
+		if (gapOpen >= 256 || gapExtend >= 256) throw new IllegalArgumentException("Gap open and extension penalties must fit into unsigned 8-bit integer");
+		//if (flag != flag & 0xFF) throw new IllegalArgumentException("Only lowest 8 bits of flag are meaningful");
+		int[] lookup = new int[257]; // lookup[256] is used as sentinal for number of unique bases/matrix size
+		java.util.Arrays.fill(lookup, -1);
+		lookup[256] = 0;
+		byte[] readNum = convertToNumeric(lookup, read, ignoreCase);
+		byte[] refNum = convertToNumeric(lookup, ref, ignoreCase);
+		byte[] flattenedMatrix = flatten(lookup, matrix);
+		int uniqueBases = lookup[256];
+		assert(flattenedMatrix.length == uniqueBases * uniqueBases);
+		assert(maxValue(readNum) < uniqueBases);
+		assert(maxValue(refNum) < uniqueBases);
+		Alignment alignment = align(
+				readNum, flattenedMatrix, uniqueBases, MAX_SCORE_UNSURE,
+				refNum, gapOpen, gapExtend, FLAG_INCLUDE_BEST_ALIGNMENT_POSITION_AND_CIGAR, (short)0, 0, Math.max(15, readNum.length / 2));
+		return alignment;
+	}
+	private static int maxValue(byte[] array) {
+		int max = Integer.MIN_VALUE;
+		for (int i = 0; i < array.length; i++) {
+			if (array[i] > max) {
+				max = array[i];
+			}
+		}
+		return max;
+	}
+	/**
+	 * Converts an ASCII sequence into numeric successive 0-based values 
+	 * @param lookup ASCII to numeric conversion lookup array
+	 * @param sequence ASCII sequence
+	 * @param ignoreCase treat upper and lowercase ASCII values as the same 
+	 * @return numeric sequence
+	 */
+	private static byte[] convertToNumeric(int[] lookup, byte[] sequence, boolean ignoreCase) {
+		byte[] numericSeq = new byte[sequence.length];
+		for (int i = 0; i < sequence.length; i++) {
+			int b = sequence[i];
+			if (ignoreCase) {
+				b = Character.toUpperCase(b);
+			}
+			if (lookup[b] == -1) {
+				lookup[b] = lookup[256]++;
+			}
+			numericSeq[i] = (byte)lookup[b];
+		}
+		return numericSeq;
+	}
+	/**
+	 * Generates a flattened ssw scoring matrix  
+	 * @param lookup ASCII to numeric conversion lookup array
+	 * @param matrix scoring matrix
+	 * @return flattened ssw numeric scoring matrix
+	 */
+	private static byte[] flatten(int[] lookup, int[][] matrix) {
+		int size = lookup[256];
+		byte[] flattened = new byte[size * size];
+		for (int i = 0; i < matrix.length; i++) {
+			int newi = lookup[i];
+			if (newi == -1) continue;
+			for (int j = 0; j < matrix[i].length; j++) {
+				int newj = lookup[j];
+				if (newj == -1) continue;
+				int score = matrix[i][j];
+				if (score < Byte.MIN_VALUE || score > Byte.MAX_VALUE) {
+					throw new IllegalArgumentException("Scoring matrix values must fit into signed 8-bit integer");
+				}
+				flattened[newi * size + newj] = (byte)score;
+			}
+		}
+		return flattened;
+	}
+}
diff --git a/src/ssw/Alignment.java b/src/ssw/Alignment.java
new file mode 100644
index 0000000..abc8240
--- /dev/null
+++ b/src/ssw/Alignment.java
@@ -0,0 +1,66 @@
+package ssw;
+
+/**
+ * Smith-Waterman alignment
+ * 
+ * @author Daniel Cameron
+ *
+ */
+public class Alignment {
+	/**
+	 * score1	the best alignment score
+	 */
+	public final short score1;
+	/**
+	 * sub-optimal alignment score
+	 */
+	public final short score2;
+	/**
+	 * 0-based best alignment beginning position on reference;	ref_begin1 = -1 when the best alignment beginning position is not available
+	 */
+	public final int ref_begin1;
+	/**
+	 * 0-based best alignment ending position on reference
+	 */
+	public final int ref_end1;
+	/**
+	 * 0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
+						position is not available
+	 */
+	public final int read_begin1;
+	/**
+	 * 0-based best alignment ending position on read
+	 */
+	public final int read_end1;
+	/**
+	 * 0-based sub-optimal alignment ending position on read
+	 */
+	public final int ref_end2;
+	/**
+	 * best alignment cigar; stored the same as that in BAM format
+	 */
+	public final String cigar;
+	public Alignment(
+			short score1,
+			short score2,
+			int ref_begin1,
+			int ref_end1,
+			int	read_begin1,
+			int read_end1,
+			int ref_end2,
+			String cigar) {
+		this.score1 = score1;
+		this.score2 = score2;
+		this.ref_begin1 = ref_begin1;
+		this.ref_end1 = ref_end1;
+		this.read_begin1 = read_begin1;
+		this.read_end1 = read_end1;
+		this.ref_end2 = ref_end2;
+		this.cigar = cigar;
+	}
+	@Override
+	public String toString() {
+		return String.format("score1=%d score2=%d, ref=%d,%d read=%d,%d refend2=%d, cigar=%s",
+				score1, score2, ref_begin1, ref_end1, read_begin1, read_end1, ref_end2, cigar);
+	}
+}
diff --git a/src/ssw/Example.java b/src/ssw/Example.java
new file mode 100644
index 0000000..8bec9bb
--- /dev/null
+++ b/src/ssw/Example.java
@@ -0,0 +1,39 @@
+package ssw;
+
+/**
+ * Example code for SSW JNI wrapper
+ * 
+ * @author Daniel Cameron
+ *
+ */
+public class Example {
+	public static void main(String[] args) {
+		try {
+			System.loadLibrary("sswjni");
+		} catch (java.lang.UnsatisfiedLinkError e) {
+			System.out.println(String.format("Cannot find libsswjni.so. Has the library been built and LD_LIBRARY_PATH or -Djava.library.path set appropriately?\n%s", e));
+			throw e;
+		}
+		int[][] score = new int[128][128];
+		for (int i = 0; i < 128; i++) {
+			for (int j = 0; j < 128; j++) {
+				if (i == j) score[i][j] = 2;
+				else score[i][j] = -2;
+			}
+		}
+		System.out.println("Aligning nucleotides");
+		Alignment aln = Aligner.align("CTGAGCCGGTAAATC".getBytes(), "CAGCCTTTCTGACCCGGAAATCAAAATAGGCACAACAAA".getBytes(), score, 3, 1, true);
+		if (aln == null) {
+			throw new RuntimeException();
+		}
+		System.out.print(String.format("score1=%d ", aln.score1));
+		System.out.print(String.format("score2=%d ", aln.score2));
+		System.out.print(String.format("ref_begin1=%d ", aln.ref_begin1));
+		System.out.print(String.format("ref_end1=%d ", aln.ref_end1));
+		System.out.print(String.format("read_begin1=%d ", aln.read_begin1));
+		System.out.print(String.format("read_end1=%d ", aln.read_end1));
+		System.out.print(String.format("ref_end2=%d ", aln.ref_end2));
+		System.out.print(String.format("cigar=%s ", aln.cigar));
+		System.out.println();
+	}
+}
diff --git a/src/ssw_cpp.cpp b/src/ssw_cpp.cpp
new file mode 100644
index 0000000..7bd3beb
--- /dev/null
+++ b/src/ssw_cpp.cpp
@@ -0,0 +1,477 @@
+#include "ssw_cpp.h"
+#include "ssw.h"
+
+#include <sstream>
+
+namespace {
+
+static const int8_t kBaseTranslation[128] = {
+    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+    4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+  //   A     C            G
+    4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+  //             T
+    4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4,
+  //   a     c            g
+    4, 0, 4, 1,  4, 4, 4, 2,  4, 4, 4, 4,  4, 4, 4, 4,
+  //             t
+    4, 4, 4, 4,  3, 0, 4, 4,  4, 4, 4, 4,  4, 4, 4, 4
+};
+
+void BuildSwScoreMatrix(const uint8_t& match_score,
+                        const uint8_t& mismatch_penalty,
+			int8_t* matrix) {
+
+  // The score matrix looks like
+  //                 // A,  C,  G,  T,  N
+  //  score_matrix_ = { 2, -2, -2, -2, -2, // A
+  //                   -2,  2, -2, -2, -2, // C
+  //                   -2, -2,  2, -2, -2, // G
+  //                   -2, -2, -2,  2, -2, // T
+  //                   -2, -2, -2, -2, -2};// N
+
+  int id = 0;
+  for (int i = 0; i < 4; ++i) {
+    for (int j = 0; j < 4; ++j) {
+      matrix[id] = ((i == j) ? match_score : static_cast<int8_t>(-mismatch_penalty));
+      ++id;
+    }
+    matrix[id] = static_cast<int8_t>(-mismatch_penalty); // For N
+    ++id;
+  }
+
+  for (int i = 0; i < 5; ++i)
+    matrix[id++] = static_cast<int8_t>(-mismatch_penalty); // For N
+
+}
+
+void ConvertAlignment(const s_align& s_al,
+                      const int& query_len,
+                      StripedSmithWaterman::Alignment* al) {
+  al->sw_score           = s_al.score1;
+  al->sw_score_next_best = s_al.score2;
+  al->ref_begin          = s_al.ref_begin1;
+  al->ref_end            = s_al.ref_end1;
+  al->query_begin        = s_al.read_begin1;
+  al->query_end          = s_al.read_end1;
+  al->ref_end_next_best  = s_al.ref_end2;
+
+  al->cigar.clear();
+  al->cigar_string.clear();
+
+  if (s_al.cigarLen > 0) {
+    std::ostringstream cigar_string;
+    if (al->query_begin > 0) {
+      uint32_t cigar = to_cigar_int(al->query_begin, 'S');
+      al->cigar.push_back(cigar);
+      cigar_string << al->query_begin << 'S';
+    }
+
+    for (int i = 0; i < s_al.cigarLen; ++i) {
+      al->cigar.push_back(s_al.cigar[i]);
+      cigar_string << cigar_int_to_len(s_al.cigar[i]) << cigar_int_to_op(s_al.cigar[i]);
+    }
+
+    int end = query_len - al->query_end - 1;
+    if (end > 0) {
+      uint32_t cigar = to_cigar_int(end, 'S');
+      al->cigar.push_back(cigar);
+      cigar_string << end << 'S';
+    }
+
+    al->cigar_string = cigar_string.str();
+  } // end if
+}
+
+// @Function:
+//     Calculate the length of the previous cigar operator
+//     and store it in new_cigar and new_cigar_string.
+//     Clean up in_M (false), in_X (false), length_M (0), and length_X(0).
+void CleanPreviousMOperator(
+    bool* in_M,
+    bool* in_X,
+    uint32_t* length_M,
+    uint32_t* length_X,
+    std::vector<uint32_t>* new_cigar,
+    std::ostringstream* new_cigar_string) {
+  if (*in_M) {
+    uint32_t match = to_cigar_int(*length_M, '=');
+    new_cigar->push_back(match);
+    (*new_cigar_string) << *length_M << '=';
+  } else if (*in_X){ //in_X
+    uint32_t match = to_cigar_int(*length_X, 'X');
+    new_cigar->push_back(match);
+    (*new_cigar_string) << *length_X << 'X';
+  }
+
+  // Clean up
+  *in_M = false;
+  *in_X = false;
+  *length_M = 0;
+  *length_X = 0;
+}
+
+// @Function:
+//     1. Calculate the number of mismatches.
+//     2. Modify the cigar string:
+//         differentiate matches (M) and mismatches(X).
+//         Note that SSW does not differentiate matches and mismatches.
+// @Return:
+//     The number of mismatches.
+int CalculateNumberMismatch(
+    StripedSmithWaterman::Alignment* al,
+    int8_t const *ref,
+    int8_t const *query,
+    const int& query_len) {
+
+  ref   += al->ref_begin;
+  query += al->query_begin;
+  int mismatch_length = 0;
+
+  std::vector<uint32_t> new_cigar;
+  std::ostringstream new_cigar_string;
+
+  if (al->query_begin > 0) {
+    uint32_t cigar = to_cigar_int(al->query_begin, 'S');
+    new_cigar.push_back(cigar);
+    new_cigar_string << al->query_begin << 'S';
+  }
+
+  bool in_M = false; // the previous is match
+  bool in_X = false; // the previous is mismatch
+  uint32_t length_M = 0;
+  uint32_t length_X = 0;
+
+  for (unsigned int i = 0; i < al->cigar.size(); ++i) {
+    char op = cigar_int_to_op(al->cigar[i]);
+    uint32_t length = cigar_int_to_len(al->cigar[i]);
+    if (op == 'M') {
+      for (uint32_t j = 0; j < length; ++j) {
+	if (*ref != *query) {
+	  ++mismatch_length;
+          if (in_M) { // the previous is match; however the current one is mismatche
+	    uint32_t match = to_cigar_int(length_M, '=');
+	    new_cigar.push_back(match);
+	    new_cigar_string << length_M << '=';
+	  }
+	  length_M = 0;
+	  ++length_X;
+	  in_M = false;
+	  in_X = true;
+	} else { // *ref == *query
+	  if (in_X) { // the previous is mismatch; however the current one is matche
+	    uint32_t match = to_cigar_int(length_X, 'X');
+	    new_cigar.push_back(match);
+	    new_cigar_string << length_X << 'X';
+	  }
+	  ++length_M;
+	  length_X = 0;
+	  in_M = true;
+	  in_X = false;
+	} // end of if (*ref != *query)
+	++ref;
+	++query;
+      }
+    } else if (op == 'I') {
+      query += length;
+      mismatch_length += length;
+      CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
+      new_cigar.push_back(al->cigar[i]);
+      new_cigar_string << length << 'I';
+    } else if (op == 'D') {
+      ref += length;
+      mismatch_length += length;
+      CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
+      new_cigar.push_back(al->cigar[i]);
+      new_cigar_string << length << 'D';
+    }
+  }
+
+  CleanPreviousMOperator(&in_M, &in_X, &length_M, &length_X, &new_cigar, &new_cigar_string);
+
+  int end = query_len - al->query_end - 1;
+  if (end > 0) {
+    uint32_t cigar = to_cigar_int(end, 'S');
+    new_cigar.push_back(cigar);
+    new_cigar_string << end << 'S';
+  }
+
+  al->cigar_string.clear();
+  al->cigar.clear();
+  al->cigar_string = new_cigar_string.str();
+  al->cigar = new_cigar;
+
+  return mismatch_length;
+}
+
+void SetFlag(const StripedSmithWaterman::Filter& filter, uint8_t* flag) {
+  if (filter.report_begin_position) *flag |= 0x08;
+  if (filter.report_cigar) *flag |= 0x0f;
+}
+
+// http://www.cplusplus.com/faq/sequences/arrays/sizeof-array/#cpp
+template <typename T, size_t N>
+inline size_t SizeOfArray( const T(&)[ N ] )
+{
+  return N;
+}
+
+} // namespace
+
+
+
+namespace StripedSmithWaterman {
+
+Aligner::Aligner(void)
+    : score_matrix_(NULL)
+    , score_matrix_size_(5)
+    , translation_matrix_(NULL)
+    , match_score_(2)
+    , mismatch_penalty_(2)
+    , gap_opening_penalty_(3)
+    , gap_extending_penalty_(1)
+    , translated_reference_(NULL)
+    , reference_length_(0)
+{
+  BuildDefaultMatrix();
+}
+
+Aligner::Aligner(
+    const uint8_t& match_score,
+    const uint8_t& mismatch_penalty,
+    const uint8_t& gap_opening_penalty,
+    const uint8_t& gap_extending_penalty)
+
+    : score_matrix_(NULL)
+    , score_matrix_size_(5)
+    , translation_matrix_(NULL)
+    , match_score_(match_score)
+    , mismatch_penalty_(mismatch_penalty)
+    , gap_opening_penalty_(gap_opening_penalty)
+    , gap_extending_penalty_(gap_extending_penalty)
+    , translated_reference_(NULL)
+    , reference_length_(0)
+{
+  BuildDefaultMatrix();
+}
+
+Aligner::Aligner(const int8_t* score_matrix,
+                 const int&    score_matrix_size,
+	         const int8_t* translation_matrix,
+		 const int&    translation_matrix_size)
+
+    : score_matrix_(NULL)
+    , score_matrix_size_(score_matrix_size)
+    , translation_matrix_(NULL)
+    , match_score_(2)
+    , mismatch_penalty_(2)
+    , gap_opening_penalty_(3)
+    , gap_extending_penalty_(1)
+    , translated_reference_(NULL)
+    , reference_length_(0)
+{
+  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
+  memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
+  translation_matrix_ = new int8_t[translation_matrix_size];
+  memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
+}
+
+
+Aligner::~Aligner(void){
+  Clear();
+}
+
+int Aligner::SetReferenceSequence(const char* seq, const int& length) {
+
+  int len = 0;
+  if (translation_matrix_) {
+    // calculate the valid length
+    //int calculated_ref_length = static_cast<int>(strlen(seq));
+    //int valid_length = (calculated_ref_length > length)
+    //                   ? length : calculated_ref_length;
+    int valid_length = length;
+    // delete the current buffer
+    CleanReferenceSequence();
+    // allocate a new buffer
+    translated_reference_ = new int8_t[valid_length];
+
+    len = TranslateBase(seq, valid_length, translated_reference_);
+  } else {
+    // nothing
+  }
+
+  reference_length_ = len;
+  return len;
+
+
+}
+
+int Aligner::TranslateBase(const char* bases, const int& length,
+    int8_t* translated) const {
+
+  const char* ptr = bases;
+  int len = 0;
+  for (int i = 0; i < length; ++i) {
+    translated[i] = translation_matrix_[(int) *ptr];
+    ++ptr;
+    ++len;
+  }
+
+  return len;
+}
+
+
+bool Aligner::Align(const char* query, const Filter& filter,
+                    Alignment* alignment) const
+{
+  if (!translation_matrix_) return false;
+  if (reference_length_ == 0) return false;
+
+  int query_len = strlen(query);
+  if (query_len == 0) return false;
+  int8_t* translated_query = new int8_t[query_len];
+  TranslateBase(query, query_len, translated_query);
+
+  const int8_t score_size = 2;
+  s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
+                                score_matrix_size_, score_size);
+
+  uint8_t flag = 0;
+  SetFlag(filter, &flag);
+  s_align* s_al = ssw_align(profile, translated_reference_, reference_length_,
+                                 static_cast<int>(gap_opening_penalty_),
+				 static_cast<int>(gap_extending_penalty_),
+				 flag, filter.score_filter, filter.distance_filter, query_len);
+
+  alignment->Clear();
+  ConvertAlignment(*s_al, query_len, alignment);
+  alignment->mismatches = CalculateNumberMismatch(&*alignment, translated_reference_, translated_query, query_len);
+
+
+  // Free memory
+  delete [] translated_query;
+  align_destroy(s_al);
+  init_destroy(profile);
+
+  return true;
+}
+
+
+bool Aligner::Align(const char* query, const char* ref, const int& ref_len,
+                    const Filter& filter, Alignment* alignment) const
+{
+  if (!translation_matrix_) return false;
+
+  int query_len = strlen(query);
+  if (query_len == 0) return false;
+  int8_t* translated_query = new int8_t[query_len];
+  TranslateBase(query, query_len, translated_query);
+
+  // calculate the valid length
+  //int calculated_ref_length = static_cast<int>(strlen(ref));
+  //int valid_ref_len = (calculated_ref_length > ref_len)
+  //                    ? ref_len : calculated_ref_length;
+  int valid_ref_len = ref_len;
+  int8_t* translated_ref = new int8_t[valid_ref_len];
+  TranslateBase(ref, valid_ref_len, translated_ref);
+
+
+  const int8_t score_size = 2;
+  s_profile* profile = ssw_init(translated_query, query_len, score_matrix_,
+                                score_matrix_size_, score_size);
+
+  uint8_t flag = 0;
+  SetFlag(filter, &flag);
+  s_align* s_al = ssw_align(profile, translated_ref, valid_ref_len,
+                                 static_cast<int>(gap_opening_penalty_),
+				 static_cast<int>(gap_extending_penalty_),
+				 flag, filter.score_filter, filter.distance_filter, query_len);
+
+  alignment->Clear();
+  ConvertAlignment(*s_al, query_len, alignment);
+  alignment->mismatches = CalculateNumberMismatch(&*alignment, translated_ref, translated_query, query_len);
+
+  // Free memory
+  delete [] translated_query;
+  delete [] translated_ref;
+  align_destroy(s_al);
+  init_destroy(profile);
+
+  return true;
+}
+
+void Aligner::Clear(void) {
+  ClearMatrices();
+  CleanReferenceSequence();
+}
+
+void Aligner::SetAllDefault(void) {
+  score_matrix_size_     = 5;
+  match_score_           = 2;
+  mismatch_penalty_      = 2;
+  gap_opening_penalty_   = 3;
+  gap_extending_penalty_ = 1;
+  reference_length_      = 0;
+}
+
+bool Aligner::ReBuild(void) {
+  if (translation_matrix_) return false;
+
+  SetAllDefault();
+  BuildDefaultMatrix();
+
+  return true;
+}
+
+bool Aligner::ReBuild(
+    const uint8_t& match_score,
+    const uint8_t& mismatch_penalty,
+    const uint8_t& gap_opening_penalty,
+    const uint8_t& gap_extending_penalty) {
+  if (translation_matrix_) return false;
+
+  SetAllDefault();
+
+  match_score_           = match_score;
+  mismatch_penalty_      = mismatch_penalty;
+  gap_opening_penalty_   = gap_opening_penalty;
+  gap_extending_penalty_ = gap_extending_penalty;
+
+  BuildDefaultMatrix();
+
+  return true;
+}
+
+bool Aligner::ReBuild(
+    const int8_t* score_matrix,
+    const int&    score_matrix_size,
+    const int8_t* translation_matrix,
+    const int&    translation_matrix_size) {
+
+  ClearMatrices();
+  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
+  memcpy(score_matrix_, score_matrix, sizeof(int8_t) * score_matrix_size_ * score_matrix_size_);
+  translation_matrix_ = new int8_t[translation_matrix_size];
+  memcpy(translation_matrix_, translation_matrix, sizeof(int8_t) * translation_matrix_size);
+
+  return true;
+}
+
+void Aligner::BuildDefaultMatrix(void) {
+  ClearMatrices();
+  score_matrix_ = new int8_t[score_matrix_size_ * score_matrix_size_];
+  BuildSwScoreMatrix(match_score_, mismatch_penalty_, score_matrix_);
+  translation_matrix_ = new int8_t[SizeOfArray(kBaseTranslation)];
+  memcpy(translation_matrix_, kBaseTranslation, sizeof(int8_t) * SizeOfArray(kBaseTranslation));
+}
+
+void Aligner::ClearMatrices(void) {
+  delete [] score_matrix_;
+  score_matrix_ = NULL;
+
+  delete [] translation_matrix_;
+  translation_matrix_ = NULL;
+}
+} // namespace StripedSmithWaterman
diff --git a/src/ssw_cpp.h b/src/ssw_cpp.h
new file mode 100644
index 0000000..cdcf717
--- /dev/null
+++ b/src/ssw_cpp.h
@@ -0,0 +1,219 @@
+#ifndef COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
+#define COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
+
+#include <stdint.h>
+#include <string>
+#include <vector>
+
+namespace StripedSmithWaterman {
+
+struct Alignment {
+  uint16_t sw_score;           // The best alignment score
+  uint16_t sw_score_next_best; // The next best alignment score
+  int32_t  ref_begin;          // Reference begin position of the best alignment
+  int32_t  ref_end;            // Reference end position of the best alignment
+  int32_t  query_begin;        // Query begin position of the best alignment
+  int32_t  query_end;          // Query end position of the best alignment
+  int32_t  ref_end_next_best;  // Reference end position of the next best alignment
+  int32_t  mismatches;         // Number of mismatches of the alignment
+  std::string cigar_string;    // Cigar string of the best alignment
+  std::vector<uint32_t> cigar; // Cigar stored in the BAM format
+                               //   high 28 bits: length
+			       //   low 4 bits: M/I/D/S/X (0/1/2/4/8);
+  void Clear() {
+    sw_score           = 0;
+    sw_score_next_best = 0;
+    ref_begin          = 0;
+    ref_end            = 0;
+    query_begin        = 0;
+    query_end          = 0;
+    ref_end_next_best  = 0;
+    mismatches         = 0;
+    cigar_string.clear();
+    cigar.clear();
+  };
+};
+
+struct Filter {
+  // NOTE: No matter the filter, those five fields of Alignment will be given anyway.
+  //       sw_score; sw_score_next_best; ref_end; query_end; ref_end_next_best.
+  // NOTE: Only need score of alignments, please set 'report_begin_position'
+  //       and 'report_cigar' false.
+
+  bool report_begin_position;    // Give ref_begin and query_begin.
+                                 //   If it is not set, ref_begin and query_begin are -1.
+  bool report_cigar;             // Give cigar_string and cigar.
+                                 //   report_begin_position is automatically TRUE.
+
+  // When *report_cigar* is true and alignment passes these two filters,
+  //   cigar_string and cigar will be given.
+  uint16_t score_filter;         // score >= score_filter
+  uint16_t distance_filter;      // ((ref_end - ref_begin) < distance_filter) &&
+                                 // ((query_end - read_begin) < distance_filter)
+
+  Filter()
+    : report_begin_position(true)
+    , report_cigar(true)
+    , score_filter(0)
+    , distance_filter(32767)
+  {};
+
+  Filter(const bool& pos, const bool& cigar, const uint16_t& score, const uint16_t& dis)
+    : report_begin_position(pos)
+    , report_cigar(cigar)
+    , score_filter(score)
+    , distance_filter(dis)
+    {};
+};
+
+class Aligner {
+ public:
+  // =========
+  // @function Construct an Aligner on default values.
+  //             The function will build the {A.C,G,T,N} aligner.
+  //             If you target for other character aligners, then please
+  //             use the other constructor and pass the corresponding matrix in.
+  // =========
+  Aligner(void);
+
+  // =========
+  // @function Construct an Aligner by assigning scores.
+  //             The function will build the {A.C,G,T,N} aligner.
+  //             If you target for other character aligners, then please
+  //             use the other constructor and pass the corresponding matrix in.
+  // =========
+  Aligner(const uint8_t& match_score,
+          const uint8_t& mismatch_penalty,
+	  const uint8_t& gap_opening_penalty,
+	  const uint8_t& gap_extending_penalty);
+
+  // =========
+  // @function Construct an Aligner by the specific matrixs.
+  // =========
+  Aligner(const int8_t* score_matrix,
+          const int&    score_matrix_size,
+          const int8_t* translation_matrix,
+	  const int&    translation_matrix_size);
+
+  ~Aligner(void);
+
+  // =========
+  // @function Build the reference sequence and thus make
+  //             Align(const char* query, s_align* alignment) function;
+  //             otherwise the reference should be given when aligning.
+  //           [NOTICE] If there exists a sequence, that one will be deleted
+  //                    and replaced.
+  // @param    seq    The reference bases;
+  //                  [NOTICE] It is not necessary null terminated.
+  // @param    length The length of bases will be be built.
+  // @return   The length of the built bases.
+  // =========
+  int SetReferenceSequence(const char* seq, const int& length);
+
+  void CleanReferenceSequence(void);
+
+  // =========
+  // @function Set penalties for opening and extending gaps
+  //           [NOTICE] The defaults are 3 and 1 respectively.
+  // =========
+  void SetGapPenalty(const uint8_t& opening, const uint8_t& extending) {
+    gap_opening_penalty_ = opening;
+    gap_extending_penalty_ = extending;
+  };
+
+  // =========
+  // @function Align the query againt the reference that is set by
+  //             SetReferenceSequence.
+  // @param    query     The query sequence.
+  // @param    filter    The filter for the alignment.
+  // @param    alignment The container contains the result.
+  // @return   True: succeed; false: fail.
+  // =========
+  bool Align(const char* query, const Filter& filter, Alignment* alignment) const;
+
+  // =========
+  // @function Align the query againt the reference.
+  //           [NOTICE] The reference won't replace the reference
+  //                      set by SetReferenceSequence.
+  // @param    query     The query sequence.
+  // @param    ref       The reference sequence.
+  //                     [NOTICE] It is not necessary null terminated.
+  // @param    ref_len   The length of the reference sequence.
+  // @param    filter    The filter for the alignment.
+  // @param    alignment The container contains the result.
+  // @return   True: succeed; false: fail.
+  // =========
+  bool Align(const char* query, const char* ref, const int& ref_len,
+             const Filter& filter, Alignment* alignment) const;
+
+  // @function Clear up all containers and thus the aligner is disabled.
+  //             To rebuild the aligner please use Build functions.
+  void Clear(void);
+
+  // =========
+  // @function Rebuild the aligner's ability on default values.
+  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
+  // @return   True: succeed; false: fail.
+  // =========
+  bool ReBuild(void);
+
+  // =========
+  // @function Rebuild the aligner's ability by the specific matrixs.
+  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
+  // @return   True: succeed; false: fail.
+  // =========
+  bool ReBuild(
+          const uint8_t& match_score,
+          const uint8_t& mismatch_penalty,
+	  const uint8_t& gap_opening_penalty,
+	  const uint8_t& gap_extending_penalty);
+
+  // =========
+  // @function Construct an Aligner by the specific matrixs.
+  //           [NOTICE] If the aligner is not cleaned, rebuilding will fail.
+  // @return   True: succeed; false: fail.
+  // =========
+  bool ReBuild(
+          const int8_t* score_matrix,
+          const int&    score_matrix_size,
+          const int8_t* translation_matrix,
+	  const int&    translation_matrix_size);
+
+ private:
+  int8_t* score_matrix_;
+  int     score_matrix_size_;
+  int8_t* translation_matrix_;
+
+  uint8_t match_score_;           // default: 2
+  uint8_t mismatch_penalty_;      // default: 2
+  uint8_t gap_opening_penalty_;   // default: 3
+  uint8_t gap_extending_penalty_; // default: 1
+
+  int8_t* translated_reference_;
+  int32_t reference_length_;
+
+  int TranslateBase(const char* bases, const int& length, int8_t* translated) const;
+  void SetAllDefault(void);
+  void BuildDefaultMatrix(void);
+  void ClearMatrices(void);
+
+  Aligner& operator= (const Aligner&);
+  Aligner (const Aligner&);
+}; // class Aligner
+
+
+// ================
+// inline functions
+// ================
+inline void Aligner::CleanReferenceSequence(void) {
+  if (reference_length_ == 0) return;
+
+  // delete the current buffer
+  if (reference_length_ > 1) delete [] translated_reference_;
+  else delete translated_reference_;
+
+  reference_length_ = 0;
+}
+} // namespace StripedSmithWaterman
+
+#endif // COMPLETE_STRIPED_SMITH_WATERMAN_CPP_H_
diff --git a/src/ssw_lib.py b/src/ssw_lib.py
new file mode 100644
index 0000000..64d6060
--- /dev/null
+++ b/src/ssw_lib.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+"""
+Simple python wrapper for SSW library
+Please put the path of libssw.so into LD_LIBRARY_PATH or pass it explicitly as a parameter
+By Yongan Zhao (March 2016)
+"""
+
+import sys
+import os.path as op
+import ctypes as ct
+
+
+
+lBlosum50 = [
+	#  A   R   N   D   C   Q   E   G   H   I   L   K   M   F   P   S   T   W   Y   V   B   Z   X   *
+     	5, -2, -1, -2, -1, -1, -1,  0, -2, -1, -2, -1, -1, -3, -1,  1,  0, -3, -2,  0, -2, -1, -1, -5,	# A
+       -2,  7, -1, -2, -4,  1,  0, -3,  0, -4, -3,  3, -2, -3, -3, -1, -1, -3, -1, -3, -1,  0, -1, -5,	# R
+       -1, -1,  7,  2, -2,  0,  0,  0,  1, -3, -4,  0, -2, -4, -2,  1,  0, -4, -2, -3,  5,  0, -1, -5,	# N
+       -2, -2,  2,  8, -4,  0,  2, -1, -1, -4, -4, -1, -4, -5, -1,  0, -1, -5, -3, -4,  6,  1, -1, -5,	# D
+       -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5,	# C
+       -1,  1,  0,  0, -3,  7,  2, -2,  1, -3, -2,  2,  0, -4, -1,  0, -1, -1, -1, -3,  0,  4, -1, -5,	# Q
+       -1,  0,  0,  2, -3,  2,  6, -3,  0, -4, -3,  1, -2, -3, -1, -1, -1, -3, -2, -3,  1,  5, -1, -5,	# E
+     	0, -3,  0, -1, -3, -2, -3,  8, -2, -4, -4, -2, -3, -4, -2,  0, -2, -3, -3, -4, -1, -2, -1, -5,	# G
+       -2,  0,  1, -1, -3,  1,  0, -2, 10, -4, -3,  0, -1, -1, -2, -1, -2, -3,  2, -4,  0,  0, -1, -5,	# H
+       -1, -4, -3, -4, -2, -3, -4, -4, -4,  5,  2, -3,  2,  0, -3, -3, -1, -3, -1,  4, -4, -3, -1, -5,	# I
+       -2, -3, -4, -4, -2, -2, -3, -4, -3,  2,  5, -3,  3,  1, -4, -3, -1, -2, -1,  1, -4, -3, -1, -5,	# L
+       -1,  3,  0, -1, -3,  2,  1, -2,  0, -3, -3,  6, -2, -4, -1,  0, -1, -3, -2, -3,  0,  1, -1, -5,	# K
+       -1, -2, -2, -4, -2,  0, -2, -3, -1,  2,  3, -2,  7,  0, -3, -2, -1, -1,  0,  1, -3, -1, -1, -5,	# M
+       -3, -3, -4, -5, -2, -4, -3, -4, -1,  0,  1, -4,  0,  8, -4, -3, -2,  1,  4, -1, -4, -4, -1, -5,	# F
+       -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5,	# P
+     	1, -1,  1,  0, -1,  0, -1,  0, -1, -3, -3,  0, -2, -3, -1,  5,  2, -4, -2, -2,  0,  0, -1, -5,	# S
+    	0, -1,  0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1,  2,  5, -3, -2,  0,  0, -1, -1, -5, 	# T
+       -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1,  1, -4, -4, -3, 15,  2, -3, -5, -2, -1, -5, 	# W
+       -2, -1, -2, -3, -3, -1, -2, -3,  2, -1, -1, -2,  0,  4, -3, -2, -2,  2,  8, -1, -3, -2, -1, -5, 	# Y
+     	0, -3, -3, -4, -1, -3, -3, -4, -4,  4,  1, -3,  1, -1, -3, -2,  0, -3, -1,  5, -3, -3, -1, -5, 	# V
+       -2, -1,  5,  6, -3,  0,  1, -1,  0, -4, -4,  0, -3, -4, -2,  0,  0, -5, -3, -3,  6,  1, -1, -5, 	# B
+       -1,  0,  0,  1, -3,  4,  5, -2,  0, -3, -3,  1, -1, -4, -1,  0, -1, -2, -2, -3,  1,  5, -1, -5, 	# Z
+       -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, 	# X
+       -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5,  1 	# *
+       ]
+
+
+
+class CAlignRes(ct.Structure):
+    """
+    @typedef	structure of the alignment result
+    @field	nScore	the best alignment score
+    @field	nScore2	sub-optimal alignment score
+    @field	nRefBeg	0-based best alignment beginning position on reference;	ref_begin1 = -1 when the best alignment beginning
+                                            position is not available
+    @field	nRefEnd	0-based best alignment ending position on reference
+    @field	nQryBeg	0-based best alignment beginning position on read; read_begin1 = -1 when the best alignment beginning
+                                            position is not available
+    @field	nQryEnd	0-based best alignment ending position on read
+    @field	nRefEnd2	0-based sub-optimal alignment ending position on read
+    @field	sCigar	best alignment cigar; stored the same as that in BAM format, high 28 bits: length, low 4 bits: M/I/D (0/1/2);
+                                    cigar = 0 when the best alignment path is not available
+    @field	nCigarLen	length of the cigar string; cigarLen = 0 when the best alignment path is not available
+    """
+    _fields_ = [('nScore', ct.c_uint16), 
+                ('nScore2', ct.c_uint16), 
+                ('nRefBeg', ct.c_int32), 
+                ('nRefEnd', ct.c_int32), 
+                ('nQryBeg', ct.c_int32), 
+                ('nQryEnd', ct.c_int32), 
+                ('nRefEnd2', ct.c_int32), 
+                ('sCigar', ct.POINTER(ct.c_uint32)), 
+                ('nCigarLen', ct.c_int32)] 
+
+
+
+class CProfile(ct.Structure):
+    """
+    @typedef	structure of the query profile
+    @field	pByte	byte array for profile
+    @field	pWord	word array for profile
+    @field	pRead	number array for read
+    @field	pMat	score matrix
+    @field	nReadLen	read length
+    @field	nN	edge length of score matrix
+    @field	nBias	bias
+    """
+    _fields_ = [('pByte', ct.POINTER(ct.c_int32)),
+                ('pWord', ct.POINTER(ct.c_int32)),
+                ('pRead', ct.POINTER(ct.c_int8)),
+                ('pMat', ct.POINTER(ct.c_int8)),
+                ('nReadLen', ct.c_int32),
+                ('nN', ct.c_int32),
+                ('nBias', ct.c_uint8)]
+
+
+
+class CSsw(object):
+    """
+    A class for libssw
+    """
+    def __init__(self, sLibPath):
+        """
+        init all para
+        @para   sLibpath    argparse object
+        """
+# load libssw
+        sLibName = 'libssw.so'
+        if not sLibPath:
+# user doesn't give the path explicitly
+            if not op.exists(op.join(sLibPath, sLibName)):
+                print >> sys.stderr, 'libssw.so does not exist in the input path'
+                sys.exit()
+            self.ssw = ct.cdll.LoadLibrary(op.join(sLibPath,sLibName))
+        else:
+# otherwise just search in PATH
+            bFound = False
+            for s in sys.path:
+                if op.exists(op.join(s,sLibName)):
+                    bFound = True
+                    self.ssw = ct.cdll.LoadLibrary(op.join(s,sLibName))
+            if bFound == False:
+                print >> sys.stderr, 'libssw.so does not exist in PATH'
+                sys.exit()
+
+# init ssw_init
+        """
+	@function	Create the query profile using the query sequence.
+	@param	read	pointer to the query sequence; the query sequence needs to be numbers
+	@param	readLen	length of the query sequence
+	@param	mat	pointer to the substitution matrix; mat needs to be corresponding to the read sequence
+	@param	n	the square root of the number of elements in mat (mat has n*n elements)
+	@param	score_size	estimated Smith-Waterman score; if your estimated best alignment score is surely < 255 please set 0; if
+						your estimated best alignment score >= 255, please set 1; if you don't know, please set 2
+	@return	pointer to the query profile structure
+	@note	example for parameter read and mat:
+			If the query sequence is: ACGTATC, the sequence that read points to can be: 1234142
+			Then if the penalty for match is 2 and for mismatch is -2, the substitution matrix of parameter mat will be:
+			//A  C  G  T
+			  2 -2 -2 -2 //A
+			 -2  2 -2 -2 //C
+			 -2 -2  2 -2 //G
+			 -2 -2 -2  2 //T
+			mat is the pointer to the array {2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2, -2, -2, -2, -2, 2}
+        """
+        self.ssw_init = self.ssw.ssw_init
+        self.ssw_init.argtypes = [ct.POINTER(ct.c_int8), ct.c_int32, ct.POINTER(ct.c_int8), ct.c_int32, ct.c_int8]
+        self.ssw_init.restype = ct.POINTER(CProfile)
+# init init_destroy
+        """
+	@function	Release the memory allocated by function ssw_init.
+	@param	p	pointer to the query profile structure
+        """
+        self.init_destroy = self.ssw.init_destroy
+        self.init_destroy.argtypes = [ct.POINTER(CProfile)]
+        self.init_destroy.restype = None
+# init ssw_align
+        """
+!	@function	Do Striped Smith-Waterman alignment.
+	@param	prof	pointer to the query profile structure
+	@param	ref	pointer to the target sequence; the target sequence needs to be numbers and corresponding to the mat parameter of
+				function ssw_init
+	@param	refLen	length of the target sequence
+	@param	weight_gapO	the absolute value of gap open penalty
+	@param	weight_gapE	the absolute value of gap extension penalty
+	@param	flag	bitwise FLAG; (from high to low) bit 5: when setted as 1, function ssw_align will return the best alignment
+					beginning position; bit 6: when setted as 1, if (ref_end1 - ref_begin1 < filterd && read_end1 - read_begin1
+					< filterd), (whatever bit 5 is setted) the function will return the best alignment beginning position and
+					cigar; bit 7: when setted as 1, if the best alignment score >= filters, (whatever bit 5 is setted) the function
+  					will return the best alignment beginning position and cigar; bit 8: when setted as 1, (whatever bit 5, 6 or 7 is
+ 					setted) the function will always return the best alignment beginning position and cigar. When flag == 0, only
+					the optimal and sub-optimal scores and the optimal alignment ending position will be returned.
+	@param	filters	score filter: when bit 7 of flag is setted as 1 and bit 8 is setted as 0, filters will be used (Please check the
+ 					decription of the flag parameter for detailed usage.)
+	@param	filterd	distance filter: when bit 6 of flag is setted as 1 and bit 8 is setted as 0, filterd will be used (Please check
+					the decription of the flag parameter for detailed usage.)
+	@param	maskLen	The distance between the optimal and suboptimal alignment ending position >= maskLen. We suggest to use
+					readLen/2, if you don't have special concerns. Note: maskLen has to be >= 15, otherwise this function will NOT
+					return the suboptimal alignment information. Detailed description of maskLen: After locating the optimal
+					alignment ending position, the suboptimal alignment score can be heuristically found by checking the second
+					largest score in the array that contains the maximal score of each column of the SW matrix. In order to avoid
+					picking the scores that belong to the alignments sharing the partial best alignment, SSW C library masks the
+					reference loci nearby (mask length = maskLen) the best alignment ending position and locates the second largest
+					score from the unmasked elements.
+	@return	pointer to the alignment result structure
+	@note	Whatever the parameter flag is setted, this function will at least return the optimal and sub-optimal alignment score,
+			and the optimal alignment ending positions on target and query sequences. If both bit 6 and 7 of the flag are setted
+			while bit 8 is not, the function will return cigar only when both criteria are fulfilled. All returned positions are
+			0-based coordinate.
+        """
+        self.ssw_align = self.ssw.ssw_align
+        self.ssw_align.argtypes = [ct.c_void_p, ct.POINTER(ct.c_int8), ct.c_int32, ct.c_uint8, ct.c_uint8, ct.c_uint8, ct.c_uint16, ct.c_int32, ct.c_int32]
+        self.ssw_align.restype = ct.POINTER(CAlignRes)
+# init align_destroy
+        """
+	@function	Release the memory allocated by function ssw_align.
+	@param	a	pointer to the alignment result structure
+        """
+        self.align_destroy = self.ssw.align_destroy
+        self.align_destroy.argtypes = [ct.POINTER(CAlignRes)]
+        self.align_destroy.restype = None
+
+
+
+def read_matrix(sFile):
+    """
+    read a score matrix for either DNA or protein
+    assume the format of the input score matrix is the same as that of http://www.ncbi.nlm.nih.gov/Class/FieldGuide/BLOSUM62.txt
+
+    """
+    with open(args.sMatrix, 'r') as f:
+        for l in f:
+            if not l.startswith('#'):
+                break
+        lEle = l.strip().split()
+        dEle2Int = {}
+        dInt2Ele = {}
+        for i,ele in enumerate(lEle):
+            dEle2Int[ele] = i
+            dEle2Int[ele.lower()] = i
+            dInt2Ele[i] = ele
+        nEleNum = len(lEle)
+        lScore = []
+        for l in f:
+            lScore.extend([int(x) for x in l.strip().split()[1:]])
+
+        return lEle, dEle2Int, dInt2Ele, lScore
+
+
+if __name__ == '__main__':
+    pass
diff --git a/src/sswjni.c b/src/sswjni.c
new file mode 100644
index 0000000..6d22baa
--- /dev/null
+++ b/src/sswjni.c
@@ -0,0 +1,61 @@
+#include <jni.h>
+#include <stdio.h>
+#include "ssw.h"
+
+jstring s_align_cigar_to_jstring(JNIEnv* env, s_align* align) {
+	jstring jstrBuf = NULL;
+	if (align->cigar != NULL && align->cigarLen > 0) {
+		char* buffer = (char *)malloc(16 * align->cigarLen + 1);
+		buffer[0] = '\0';
+		char* currentBufferPosition = buffer;
+		int i;
+		for (i = 0; i < align->cigarLen; i++) {
+			int charsPrinted = sprintf(currentBufferPosition, "%d%c", cigar_int_to_len(align->cigar[i]), cigar_int_to_op(align->cigar[i]));
+			currentBufferPosition += charsPrinted;
+		}
+		jstrBuf = (*env)->NewStringUTF(env, buffer);
+		free(buffer);
+	}
+	return jstrBuf;
+}
+jobject s_align_to_ssw_Alignment(JNIEnv* env, s_align* align) {
+	if (align == NULL) return NULL;
+	jclass clazz = (*env)->FindClass(env, "ssw/Alignment");
+	jmethodID constructor = (*env)->GetMethodID(env, clazz, "<init>", "(SSIIIIILjava/lang/String;)V");
+	jobject result = (*env)->NewObject(env, clazz, constructor,
+		align->score1,
+		align->score2,
+		align->ref_begin1,
+		align->ref_end1,
+		align->read_begin1,
+		align->read_end1,
+		align->ref_end2,
+		s_align_cigar_to_jstring(env, align));
+	return result;
+}
+JNIEXPORT jobject JNICALL Java_ssw_Aligner_align(JNIEnv* env, jclass cls,
+		jbyteArray read, jbyteArray matrix, jint matrixSize, jint score_size,
+		jbyteArray ref,
+		jint gapOpen,
+		jint gapExtend,
+		jint flag,
+		jshort filters,
+		jint filterd,
+		jint maskLen) {
+	jbyte* readPtr = (*env)->GetByteArrayElements(env, read, NULL);
+	jsize readLen = (*env)->GetArrayLength(env, read);
+	jbyte* matrixPtr = (*env)->GetByteArrayElements(env, matrix, NULL);
+	/*jsize matrixLen = (*env)->GetArrayLength(env, matrix);*/
+	jbyte* refPtr = (*env)->GetByteArrayElements(env, ref, NULL);
+	jsize refLen = (*env)->GetArrayLength(env, ref);
+	s_profile* profile = ssw_init(readPtr, readLen, matrixPtr, matrixSize, (int8_t)score_size);
+	s_align* align = ssw_align(profile, refPtr, refLen, gapOpen, gapExtend, flag, filters, filterd, maskLen);
+	jobject jalignment = s_align_to_ssw_Alignment(env, align);
+	align_destroy(align);
+	init_destroy(profile);
+	(*env)->ReleaseByteArrayElements(env, read, readPtr, JNI_ABORT);
+	(*env)->ReleaseByteArrayElements(env, matrix, matrixPtr, JNI_ABORT);
+	(*env)->ReleaseByteArrayElements(env, ref, refPtr, JNI_ABORT);
+	return jalignment;
+}
+

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/libssw.git



More information about the debian-med-commit mailing list