[med-svn] [cutadapt] 01/02: Imported Upstream version 1.10

Andreas Tille tille at debian.org
Mon Jun 20 13:12:14 UTC 2016


This is an automated email from the git hooks/post-receive script.

tille pushed a commit to branch master
in repository cutadapt.

commit 3967441bfe20edf8ef01b759dde762a87416909d
Author: Andreas Tille <tille at debian.org>
Date:   Mon Jun 20 15:11:12 2016 +0200

    Imported Upstream version 1.10
---
 .gitignore                                  |   16 +
 .travis.yml                                 |   20 +
 CHANGES.rst                                 |  355 +++++++
 CITATION                                    |   16 +
 Dockerfile                                  |   17 +
 LICENSE                                     |   19 +
 MANIFEST.in                                 |   17 +
 README.rst                                  |   41 +
 bin/_preamble.py                            |   21 +
 bin/cutadapt                                |   10 +
 cutadapt/__init__.py                        |   23 +
 cutadapt/_align.pyx                         |  533 +++++++++++
 cutadapt/_qualtrim.pyx                      |   84 ++
 cutadapt/_seqio.pyx                         |  138 +++
 cutadapt/adapters.py                        |  569 +++++++++++
 cutadapt/align.py                           |   35 +
 cutadapt/colorspace.py                      |   83 ++
 cutadapt/compat.py                          |   45 +
 cutadapt/filters.py                         |  256 +++++
 cutadapt/modifiers.py                       |  275 ++++++
 cutadapt/qualtrim.py                        |   70 ++
 cutadapt/report.py                          |  296 ++++++
 cutadapt/scripts/__init__.py                |    0
 cutadapt/scripts/cutadapt.py                |  726 ++++++++++++++
 cutadapt/seqio.py                           |  756 +++++++++++++++
 cutadapt/xopen.py                           |  182 ++++
 doc/Makefile                                |  179 ++++
 doc/_static/adapters.svg                    |  259 +++++
 doc/_static/logo.svg                        |   94 ++
 doc/changes.rst                             |    1 +
 doc/colorspace.rst                          |  128 +++
 doc/conf.py                                 |  270 ++++++
 doc/guide.rst                               | 1373 +++++++++++++++++++++++++++
 doc/ideas.rst                               |  103 ++
 doc/index.rst                               |   25 +
 doc/installation.rst                        |  127 +++
 doc/recipes.rst                             |   83 ++
 setup.py                                    |  148 +++
 tests/.gitignore                            |    3 +
 tests/__init__.py                           |    0
 tests/cut/454.fa                            |  118 +++
 tests/cut/anchored-back.fasta               |    8 +
 tests/cut/anchored.fasta                    |    8 +
 tests/cut/anchored_no_indels.fasta          |   12 +
 tests/cut/anchored_no_indels_wildcard.fasta |   12 +
 tests/cut/anywhere_repeat.fastq             |   28 +
 tests/cut/discard-untrimmed.fastq           |    4 +
 tests/cut/discard.fastq                     |    4 +
 tests/cut/dos.fastq                         |   12 +
 tests/cut/empty.fastq                       |    0
 tests/cut/example.fa                        |   18 +
 tests/cut/examplefront.fa                   |   18 +
 tests/cut/illumina.fastq                    |  400 ++++++++
 tests/cut/illumina.info.txt                 |  100 ++
 tests/cut/illumina5.fastq                   |   20 +
 tests/cut/illumina5.info.txt                |    8 +
 tests/cut/illumina64.fastq                  |   80 ++
 tests/cut/interleaved.fastq                 |   16 +
 tests/cut/issue46.fasta                     |    2 +
 tests/cut/linked.fasta                      |   10 +
 tests/cut/lowercase.fastq                   |   12 +
 tests/cut/lowqual.fastq                     |    8 +
 tests/cut/maxlen.fa                         |   14 +
 tests/cut/maxn0.2.fasta                     |    6 +
 tests/cut/maxn0.4.fasta                     |    8 +
 tests/cut/maxn0.fasta                       |    4 +
 tests/cut/maxn1.fasta                       |    8 +
 tests/cut/maxn2.fasta                       |   10 +
 tests/cut/minlen.fa                         |   16 +
 tests/cut/minlen.noprimer.fa                |   14 +
 tests/cut/nextseq.fastq                     |    8 +
 tests/cut/no-trim.fastq                     |    4 +
 tests/cut/no_indels.fasta                   |   18 +
 tests/cut/overlapa.fa                       |   40 +
 tests/cut/overlapb.fa                       |   38 +
 tests/cut/paired-filterboth.1.fastq         |   16 +
 tests/cut/paired-filterboth.2.fastq         |   16 +
 tests/cut/paired-m27.1.fastq                |   16 +
 tests/cut/paired-m27.2.fastq                |   16 +
 tests/cut/paired-onlyA.1.fastq              |   16 +
 tests/cut/paired-onlyA.2.fastq              |   16 +
 tests/cut/paired-separate.1.fastq           |   16 +
 tests/cut/paired-separate.2.fastq           |   16 +
 tests/cut/paired-too-short.1.fastq          |    4 +
 tests/cut/paired-too-short.2.fastq          |    4 +
 tests/cut/paired-trimmed.1.fastq            |   12 +
 tests/cut/paired-trimmed.2.fastq            |   12 +
 tests/cut/paired-untrimmed.1.fastq          |    4 +
 tests/cut/paired-untrimmed.2.fastq          |    4 +
 tests/cut/paired.1.fastq                    |   12 +
 tests/cut/paired.2.fastq                    |   12 +
 tests/cut/paired.m14.1.fastq                |   12 +
 tests/cut/paired.m14.2.fastq                |   12 +
 tests/cut/pairedq.1.fastq                   |    8 +
 tests/cut/pairedq.2.fastq                   |    8 +
 tests/cut/pairedu.1.fastq                   |   16 +
 tests/cut/pairedu.2.fastq                   |   16 +
 tests/cut/plus.fastq                        |    8 +
 tests/cut/polya.fasta                       |    2 +
 tests/cut/rest.fa                           |   18 +
 tests/cut/restfront.fa                      |   18 +
 tests/cut/s_1_sequence.txt                  |    8 +
 tests/cut/small.fasta                       |    6 +
 tests/cut/small.fastq                       |   12 +
 tests/cut/small.trimmed.fastq               |    8 +
 tests/cut/small.untrimmed.fastq             |    4 +
 tests/cut/solid-no-zerocap.fastq            |  120 +++
 tests/cut/solid.fasta                       |    4 +
 tests/cut/solid.fastq                       |  120 +++
 tests/cut/solid5p-anchored.fasta            |   32 +
 tests/cut/solid5p-anchored.fastq            |   64 ++
 tests/cut/solid5p-anchored.notrim.fasta     |   32 +
 tests/cut/solid5p-anchored.notrim.fastq     |   64 ++
 tests/cut/solid5p.fasta                     |   32 +
 tests/cut/solid5p.fastq                     |   64 ++
 tests/cut/solidbfast.fastq                  |  120 +++
 tests/cut/solidmaq.fastq                    |  120 +++
 tests/cut/solidqual.fastq                   |  120 +++
 tests/cut/sra.fastq                         |   24 +
 tests/cut/stripped.fasta                    |    4 +
 tests/cut/suffix.fastq                      |  120 +++
 tests/cut/trimN3.fasta                      |    2 +
 tests/cut/trimN5.fasta                      |    2 +
 tests/cut/twoadapters.fasta                 |    6 +
 tests/cut/twoadapters.first.fasta           |    2 +
 tests/cut/twoadapters.second.fasta          |    2 +
 tests/cut/twoadapters.unknown.fasta         |    2 +
 tests/cut/unconditional-back.fastq          |   12 +
 tests/cut/unconditional-both.fastq          |   12 +
 tests/cut/unconditional-front.fastq         |   12 +
 tests/cut/wildcard.fa                       |    4 +
 tests/cut/wildcardN.fa                      |    6 +
 tests/cut/wildcard_adapter.fa               |    8 +
 tests/cut/wildcard_adapter_anywhere.fa      |    8 +
 tests/data/454.fa                           |  118 +++
 tests/data/E3M.fasta                        |   59 ++
 tests/data/E3M.qual                         |   59 ++
 tests/data/adapter.fasta                    |    4 +
 tests/data/anchored-back.fasta              |    8 +
 tests/data/anchored.fasta                   |    8 +
 tests/data/anchored_no_indels.fasta         |   12 +
 tests/data/anywhere_repeat.fastq            |   28 +
 tests/data/dos.fastq                        |   12 +
 tests/data/empty.fastq                      |    0
 tests/data/example.fa                       |   18 +
 tests/data/illumina.fastq.gz                |  Bin 0 -> 7161 bytes
 tests/data/illumina5.fastq                  |   20 +
 tests/data/illumina64.fastq                 |   80 ++
 tests/data/interleaved.fastq                |   32 +
 tests/data/issue46.fasta                    |    2 +
 tests/data/lengths.fa                       |   28 +
 tests/data/linked.fasta                     |   10 +
 tests/data/lowqual.fastq                    |    8 +
 tests/data/maxn.fasta                       |   12 +
 tests/data/multiblock.fastq.gz              |  Bin 0 -> 262 bytes
 tests/data/nextseq.fastq                    |    8 +
 tests/data/no_indels.fasta                  |   20 +
 tests/data/overlapa.fa                      |   40 +
 tests/data/overlapb.fa                      |   38 +
 tests/data/paired.1.fastq                   |   16 +
 tests/data/paired.2.fastq                   |   16 +
 tests/data/plus.fastq                       |    8 +
 tests/data/polya.fasta                      |    6 +
 tests/data/prefix-adapter.fasta             |    2 +
 tests/data/rest.fa                          |   18 +
 tests/data/rest.txt                         |    5 +
 tests/data/restfront.txt                    |    6 +
 tests/data/s_1_sequence.txt.gz              |  Bin 0 -> 97 bytes
 tests/data/simple.fasta                     |    7 +
 tests/data/simple.fastq                     |    8 +
 tests/data/small.fastq                      |   12 +
 tests/data/small.fastq.bz2                  |  Bin 0 -> 222 bytes
 tests/data/small.fastq.gz                   |  Bin 0 -> 218 bytes
 tests/data/small.fastq.xz                   |  Bin 0 -> 260 bytes
 tests/data/small.myownextension             |   12 +
 tests/data/solid.csfasta                    |   63 ++
 tests/data/solid.fasta                      |    4 +
 tests/data/solid.fastq                      |  120 +++
 tests/data/solid.qual                       |   63 ++
 tests/data/solid5p.fasta                    |   34 +
 tests/data/solid5p.fastq                    |   64 ++
 tests/data/sra.fastq                        |   24 +
 tests/data/suffix-adapter.fasta             |    2 +
 tests/data/toolong.fa                       |   14 +
 tests/data/tooshort.fa                      |   12 +
 tests/data/tooshort.noprimer.fa             |   14 +
 tests/data/trimN3.fasta                     |    2 +
 tests/data/trimN5.fasta                     |    2 +
 tests/data/twoadapters.fasta                |    6 +
 tests/data/wildcard.fa                      |    4 +
 tests/data/wildcardN.fa                     |    6 +
 tests/data/wildcard_adapter.fa              |    8 +
 tests/data/withplus.fastq                   |    8 +
 tests/testadapters.py                       |  125 +++
 tests/testalign.py                          |  123 +++
 tests/testcolorspace.py                     |  140 +++
 tests/testfilters.py                        |   42 +
 tests/testmodifiers.py                      |   36 +
 tests/testpaired.py                         |  273 ++++++
 tests/testqualtrim.py                       |   14 +
 tests/tests.py                              |  383 ++++++++
 tests/testseqio.py                          |  352 +++++++
 tests/testtrim.py                           |   27 +
 tests/testxopen.py                          |  101 ++
 tests/utils.py                              |   50 +
 tox.ini                                     |    6 +
 206 files changed, 12810 insertions(+)

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..3b6890e
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,16 @@
+*.pyc
+MANIFEST
+build/
+dist/
+.coverage
+*~
+.tox
+galaxy/package/
+.pydevproject
+.project
+.settings
+cutadapt/_*.c
+cutadapt/*.so
+doc/_build
+*.pyo
+.idea/
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..4fb5846
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,20 @@
+sudo: false
+language: python
+cache:
+  directories:
+    - $HOME/.cache/pip
+python:
+  - "2.6"
+  - "2.7"
+  - "3.3"
+  - "3.4"
+  - "3.5"
+
+install:
+  - pip install --upgrade pip wheel
+  - pip install Cython
+  - pip install .
+
+script:
+  - nosetests -P tests
+
diff --git a/CHANGES.rst b/CHANGES.rst
new file mode 100644
index 0000000..c3c97b2
--- /dev/null
+++ b/CHANGES.rst
@@ -0,0 +1,355 @@
+=======
+Changes
+=======
+
+v1.10
+-----
+
+* Added a new “linked adapter” type, which can be used to search for a 5' and a
+  3' adapter at the same time. Use ``-a ADAPTER1...ADAPTER2` to search
+  for a linked adapter. ADAPTER1 is interpreted as an anchored 5' adapter, which
+  is searched for first. Only if ADAPTER1 is found will ADAPTER2 be searched
+  for, which is a regular 3' adapter.
+* Added experimental ``--nextseq-trim`` option for quality trimming of NextSeq
+  data. This is necessary because that machine cannot distinguish between G and
+  reaching the end of the fragment (it encodes G as 'black').
+* Even when trimming FASTQ files, output can now be FASTA (quality values are
+  simply dropped). Use the ``-o``/``-p`` options with a file name that ends in
+  ``.fasta`` or ``.fa`` to enable this.
+* Cutadapt does not bundle pre-compiled C extension modules (``.so`` files)
+  anymore. This affects only users that run cutadapt directly from an unpacked
+  tarball. Install through ``pip`` or ``conda`` instead.
+* Fix issue #167: Option ``--quiet`` was not entirely quiet.
+* Fix issue #199: Be less strict when checking for properly-paired reads.
+* This is the last version of cutadapt to support Python 2.6. Future versions
+  will require at least Python 2.7.
+
+v1.9.1
+------
+
+* Added ``--pair-filter`` option, which :ref:`modifies how filtering criteria
+  apply to paired-end reads <filtering-paired>`
+* Add ``--too-short-paired-output`` and ``--too-long-paired-output`` options.
+* Fix incorrect number of trimmed bases reported if ``--times`` option was used.
+
+v1.9
+----
+
+* Indels in the alignment can now be disabled for all adapter types (use
+  ``--no-indels``).
+* Quality values are now printed in the info file (``--info-file``)
+  when trimming FASTQ files. Fixes issue #144.
+* Options ``--prefix`` and ``--suffix``, which modify read names, now accept the
+  placeholder ``{name}`` and will replace it with the name of the found adapter.
+  Fixes issue #104.
+* Interleaved FASTQ files: With the ``--interleaved`` switch, paired-end reads
+  will be read from and written to interleaved FASTQ files. Fixes issue #113.
+* Anchored 5' adapters can now be specified by writing ``-a SEQUENCE...`` (note
+  the three dots).
+* Fix ``--discard-untrimmed`` and ``--discard-trimmed`` not working as expected
+  in paired-end mode (issue #146).
+* The minimum overlap is now automatically reduced to the adapter length if it
+  is too large. Fixes part of issue #153.
+* Thanks to Wolfgang Gerlach, there is now a Dockerfile.
+* The new ``--debug`` switch makes cutadapt print out the alignment matrix.
+
+v1.8.3
+------
+
+* Fix issue #95: Untrimmed reads were not listed in the info file.
+* Fix issue #138: pip install cutadapt did not work with new setuptools versions.
+* Fix issue #137: Avoid a hang when writing to two or more gzip-compressed
+  output files in Python 2.6.
+
+v1.8.1
+------
+
+* Fix #110: Counts for 'too short' and 'too long' reads were swapped in statistics.
+* Fix #115: Make ``--trim-n`` work also on second read for paired-end data.
+
+v1.8
+----
+
+* Support single-pass paired-end trimming with the new ``-A``/``-G``/``-B``/``-U``
+  parameters. These work just like their -a/-g/-b/-u counterparts, but they
+  specify sequences that are removed from the *second read* in a pair.
+
+  Also, if you start using one of those options, the read modification options
+  such as ``-q`` (quality trimming) are applied to *both* reads. For backwards
+  compatibility, read modifications are applied to the first read only if
+  neither of ``-A``/``-G``/``-B``/``-U`` is used. See `the
+  documentation <http://cutadapt.readthedocs.org/en/latest/guide.html#paired-end>`_
+  for details.
+
+  This feature has not been extensively tested, so please give feedback if
+  something does not work.
+* The report output has been re-worked in order to accomodate the new paired-end
+  trimming mode. This also changes the way the report looks like in single-end
+  mode. It is hopefully now more accessible.
+* Chris Mitchell contributed a patch adding two new options: ``--trim-n``
+  removes any ``N`` bases from the read ends, and the ``--max-n`` option can be
+  used to filter out reads with too many ``N``.
+* Support notation for repeated bases in the adapter sequence: Write ``A{10}``
+  instead of ``AAAAAAAAAA``. Useful for poly-A trimming: Use ``-a A{100}`` to
+  get the longest possible tail.
+* Quality trimming at the 5' end of reads is now supported. Use ``-q 15,10`` to
+  trim the 5' end with a cutoff of 15 and the 3' end with a cutoff of 10.
+* Fix incorrectly reported statistics (> 100% trimmed bases) when ``--times``
+  set to a value greater than one.
+* Support .xz-compressed files (if running in Python 3.3 or later).
+* Started to use the GitHub issue tracker instead of Google Code. All old issues
+  have been moved.
+
+v1.7
+----
+* IUPAC characters are now supported. For example, use ``-a YACGT`` for an
+  adapter that matches both ``CACGT`` and ``TACGT`` with zero errors. Disable
+  with ``-N``. By default, IUPAC characters in the read are not interpreted in
+  order to avoid matches in reads that consist of many (low-quality) ``N``
+  bases. Use ``--match-read-wildcards`` to enable them also in the read.
+* Support for demultiplexing was added. This means that reads can be written to
+  different files depending on which adapter was found. See `the section in the
+  documentation <http://cutadapt.readthedocs.org/en/latest/guide.html#demultiplexing>`_
+  for how to use it. This is currently only supported for single-end reads.
+* Add support for anchored 3' adapters. Append ``$`` to the adapter sequence to
+  force the adapter to appear in the end of the read (as a suffix). Closes
+  issue #81.
+* Option ``--cut`` (``-u``) can now be specified twice, once for each end of the
+  read. Thanks to Rasmus Borup Hansen for the patch!
+* Options ``--minimum-length``/``--maximum-length`` (``-m``/``-M``) can be used
+  standalone. That is, cutadapt can be used to filter reads by length without
+  trimming adapters.
+* Fix bug: Adapters read from a FASTA file can now be anchored.
+
+v1.6
+----
+* Fix bug: Ensure ``--format=...`` can be used even with paired-end input.
+* Fix bug: Sometimes output files would be incomplete because they were not
+  closed correctly.
+* Alignment algorithm is a tiny bit faster.
+* Extensive work on the documentation. It's now available at
+  https://cutadapt.readthedocs.org/ .
+* For 3' adapters, statistics about the bases preceding the trimmed adapter
+  are collected and printed. If one of the bases is overrepresented, a warning
+  is shown since this points to an incomplete adapter sequence. This happens,
+  for example, when a TruSeq adapter is used but the A overhang is not taken
+  into account when running cutadapt.
+* Due to code cleanup, there is a change in behavior: If you use
+  ``--discard-trimmed`` or ``--discard-untrimmed`` in combination with
+  ``--too-short-output`` or ``--too-long-output``, then cutadapt now writes also
+  the discarded reads to the output files given by the ``--too-short`` or
+  ``--too-long`` options. If anyone complains, I will consider reverting this.
+* Galaxy support files are now in `a separate
+  repository <https://bitbucket.org/lance_parsons/cutadapt_galaxy_wrapper>`_.
+
+v1.5
+----
+* Adapter sequences can now be read from a FASTA file. For example, write
+  ``-a file:adapters.fasta`` to read 3' adapters from ``adapters.fasta``. This works
+  also for ``-b`` and ``-g``.
+* Add the option ``--mask-adapter``, which can be used to not remove adapters,
+  but to instead mask them with ``N`` characters. Thanks to Vittorio Zamboni
+  for contributing this feature!
+* U characters in the adapter sequence are automatically converted to T.
+* Do not run Cython at installation time unless the --cython option is provided.
+* Add the option -u/--cut, which can be used to unconditionally remove a number
+  of bases from the beginning or end of each read.
+* Make ``--zero-cap`` the default for colorspace reads.
+* When the new option ``--quiet`` is used, no report is printed after all reads
+  have been processed.
+* When processing paired-end reads, cutadapt now checks whether the reads are
+  properly paired.
+* To properly handle paired-end reads, an option --untrimmed-paired-output was
+  added.
+
+v1.4
+----
+* This release of cutadapt reduces the overhead of reading and writing files.
+  On my test data set, a typical run of cutadapt (with a single adapter) takes
+  40% less time due to the following two changes.
+* Reading and writing of FASTQ files is faster (thanks to Cython).
+* Reading and writing of gzipped files is faster (up to 2x) on systems
+  where the ``gzip`` program is available.
+* The quality trimming function is four times faster (also due to Cython).
+* Fix the statistics output for 3' colorspace adapters: The reported lengths were one
+  too short. Thanks to Frank Wessely for reporting this.
+* Support the ``--no-indels`` option. This disallows insertions and deletions while
+  aligning the adapter. Currently, the option is only available for anchored 5' adapters.
+  This fixes issue 69.
+* As a sideeffect of implementing the --no-indels option: For colorspace, the
+  length of a read (for ``--minimum-length`` and ``--maximum-length``) is now computed after
+  primer base removal (when ``--trim-primer`` is specified).
+* Added one column to the info file that contains the name of the found adapter.
+* Add an explanation about colorspace ambiguity to the README
+
+v1.3
+----
+* Preliminary paired-end support with the ``--paired-output`` option (contributed by
+  James Casbon). See the README section on how to use it.
+* Improved statistics.
+* Fix incorrectly reported amount of quality-trimmed Mbp (issue 57, fix by Chris Penkett)
+* Add the ``--too-long-output`` option.
+* Add the ``--no-trim`` option, contributed by Dave Lawrence.
+* Port handwritten C alignment module to Cython.
+* Fix the ``--rest-file`` option (issue 56)
+* Slightly speed up alignment of 5' adapters.
+* Support bzip2-compressed files.
+
+v1.2
+----
+* At least 25% faster processing of .csfasta/.qual files due to faster parser.
+* Between 10% and 30% faster writing of gzip-compressed output files.
+* Support 5' adapters in colorspace, even when no primer trimming is requested.
+* Add the ``--info-file`` option, which has a line for each found adapter.
+* Named adapters are possible. Usage: ``-a My_Adapter=ACCGTA`` assigns the name "My_adapter".
+* Improve alignment algorithm for better poly-A trimming when there are sequencing errors.
+  Previously, not the longest possible poly-A tail would be trimmed.
+* James Casbon contributed the ``--discard-untrimmed`` option.
+
+v1.1
+----
+* Allow to "anchor" 5' adapters (``-g``), forcing them to be a prefix of the read.
+  To use this, add the special character ``^`` to the beginning of the adapter sequence.
+* Add the "-N" option, which allows 'N' characters within adapters to match literally.
+* Speedup of approx. 25% when reading from .gz files and using Python 2.7.
+* Allow to only trim qualities when no adapter is given on the command-line.
+* Add a patch by James Casbon: include read names (ids) in rest file
+* Use nosetest for testing. To run, install nose and run "nosetests".
+* When using cutadapt without installing it, you now need to run ``bin/cutadapt`` due to
+  a new directory layout.
+* Allow to give a colorspace adapter in basespace (gets automatically converted).
+* Allow to search for 5' adapters (those specified with ``-g``) in colorspace.
+* Speed up the alignment by a factor of at least 3 by using Ukkonen's algorithm.
+  The total runtime decreases by about 30% in the tested cases.
+* allow to deal with colorspace FASTQ files from the SRA that contain a fake
+  additional quality in the beginning (use ``--format sra-fastq``)
+
+v1.0
+----
+* ASCII-encoded quality values were assumed to be encoded as ascii(quality+33).
+  With the new parameter ``--quality-base``, this can be changed to ascii(quality+64),
+  as used in some versions of the Illumina pipeline. (Fixes issue 7.)
+* Allow to specify that adapters were ligated to the 5' end of reads. This change
+  is based on a patch contributed by James Casbon.
+* Due to cutadapt being published in EMBnet.journal, I found it appropriate
+  to call this release version 1.0. Please see
+  http://journal.embnet.org/index.php/embnetjournal/article/view/200 for the
+  article and I would be glad if you cite it.
+* Add Galaxy support, contributed by Lance Parsons.
+* Patch by James Casbon: Allow N wildcards in read or adapter or both.
+  Wildcard matching of 'N's in the adapter is always done. If 'N's within reads
+  should also match without counting as error, this needs to be explicitly
+  requested via ``--match-read-wildcards``.
+
+v0.9.5
+------
+* Fix issue 20: Make the report go to standard output when ``-o``/``--output`` is
+  specified.
+* Recognize `.fq` as an extension for FASTQ files
+* many more unit tests
+* The alignment algorithm has changed. It will now find some adapters that
+  previously were missed. Note that this will produce different output than
+  older cutadapt versions!
+
+  Before this change, finding an adapter would work as follows:
+
+  - Find an alignment between adapter and read -- longer alignments are
+    better.
+  - If the number of errors in the alignment (divided by length) is above the
+    maximum error rate, report the adapter as not being found.
+
+  Sometimes, the long alignment that is found had too many errors, but a
+  shorter alignment would not. The adapter was then incorrectly seen as "not
+  found". The new alignment algorithm checks the error rate while aligning and only
+  reports alignments that do not have too many errors.
+
+v0.9.4
+------
+* now compatible with Python 3
+* Add the ``--zero-cap`` option, which changes negative quality values to zero.
+  This is a workaround to avoid segmentation faults in BWA. The option is now
+  enabled by default when ``--bwa``/``--maq`` is used.
+* Lots of unit tests added. Run them with ``cd tests && ./tests.sh``.
+* Fix issue 16: ``--discard-trimmed`` did not work.
+* Allow to override auto-detection of input file format with the new ``-f``/``--format``
+  parameter. This mostly fixes issue 12.
+* Don't break when input file is empty.
+
+v0.9.2
+------
+* Install a single ``cutadapt`` Python package instead of multiple Python
+  modules. This avoids cluttering the global namespace and should lead to less
+  problems with other Python modules. Thanks to Steve Lianoglou for
+  pointing this out to me!
+* ignore case (ACGT vs acgt) when comparing the adapter with the read sequence
+* .FASTA/.QUAL files (not necessarily colorspace) can now be read (some
+  454 software uses this format)
+* Move some functions into their own modules
+* lots of refactoring: replace the fasta module with a much nicer seqio module.
+* allow to input FASTA/FASTQ on standard input (also FASTA/FASTQ is
+  autodetected)
+
+v0.9
+----
+* add ``--too-short-output`` and ``--untrimmed-output``, based on patch by Paul Ryvkin (thanks!)
+* add ``--maximum-length`` parameter: discard reads longer than a specified length
+* group options by category in ``--help`` output
+* add ``--length-tag`` option. allows to fix read length in FASTA/Q comment lines
+  (e.g., ``length=123`` becomes ``length=58`` after trimming) (requested by Paul Ryvkin)
+* add ``-q``/``--quality-cutoff`` option for trimming low-quality ends (uses the same algorithm
+  as BWA)
+* some refactoring
+* the filename ``-`` is now interpreted as standard in or standard output
+
+v0.8
+----
+* Change default behavior of searching for an adapter: The adapter is now assumed to
+  be an adapter that has been ligated to the 3' end. This should be the correct behavior
+  for at least the SOLiD small RNA protocol (SREK) and also for the Illumina protocol.
+  To get the old behavior, which uses a heuristic to determine whether the adapter was
+  ligated to the 5' or 3' end and then trimmed the read accordingly, use the new
+  ``-b`` (``--anywhere``) option.
+* Clear up how the statistics after processing all reads are printed.
+* Fix incorrect statistics. Adapters starting at pos. 0 were correctly trimmed,
+  but not counted.
+* Modify scoring scheme: Improves trimming (some reads that should have been
+  trimmed were not). Increases no. of trimmed reads in one of our SOLiD data sets
+  from 36.5 to 37.6%.
+* Speed improvements (20% less runtime on my test data set).
+
+v0.7
+----
+* Useful exit codes
+* Better error reporting when malformed files are encountered
+* Add ``--minimum-length`` parameter for discarding reads that are shorter than
+  a specified length after trimming.
+* Generalize the alignment function a bit. This is preparation for
+  supporting adapters that are specific to either the 5' or 3' end.
+* pure Python fallback for alignment function for when the C module cannot
+  be used.
+
+v0.6
+----
+* Support gzipped input and output.
+* Print timing information in statistics.
+
+v0.5
+----
+* add ``--discard`` option which makes cutadapt discard reads in which an adapter occurs
+
+v0.4
+----
+* (more) correctly deal with multiple adapters: If a long adapter matches with lots of
+  errors, then this could lead to a a shorter adapter matching with few errors getting ignored.
+
+v0.3
+----
+* fix huge memory usage (entire input file was unintentionally read into memory)
+
+v0.2
+----
+* allow FASTQ input
+
+v0.1
+----
+* initial release
diff --git a/CITATION b/CITATION
new file mode 100644
index 0000000..a1e62e2
--- /dev/null
+++ b/CITATION
@@ -0,0 +1,16 @@
+Marcel Martin. Cutadapt removes adapter sequences from high-throughput sequencing reads.
+EMBnet.journal, 17(1):10-12, May 2011.
+DOI: http://dx.doi.org/10.14806/ej.17.1.200
+
+ at ARTICLE{Martin2011Cutadapt,
+  author = {Marcel Martin},
+  title = {Cutadapt removes adapter sequences from high-throughput sequencing reads},
+  journal = {EMBnet.journal},
+  year = 2011,
+  month = may,
+  volume = 17,
+  pages = {10--12},
+  number = 1,
+  doi = {http://dx.doi.org/10.14806/ej.17.1.200},
+  url = {http://journal.embnet.org/index.php/embnetjournal/article/view/200}
+}
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..e0d8145
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,17 @@
+FROM debian:jessie
+
+RUN apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+  python2.7-dev \
+  cython
+
+ADD . /cutadapt/
+
+RUN cd /cutadapt/ && python setup.py install && python setup.py build_ext -i
+
+ENTRYPOINT ["/cutadapt/bin/cutadapt"]
+CMD ["--help"]
+
+# git clone https://github.com/marcelm/cutadapt.git
+# cd cutadapt
+# docker build -t marcelm/cutadapt:latest .
+
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..df04e21
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,19 @@
+Copyright (c) 2010-2016 Marcel Martin <marcel.martin at scilifelab.se>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..7d8f3b2
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,17 @@
+# documentation
+include README.rst
+include CHANGES.rst
+include CITATION
+include LICENSE
+include doc/*.rst
+include doc/conf.py
+include doc/Makefile
+include cutadapt/*.pyx
+include cutadapt/_align.c
+include cutadapt/_qualtrim.c
+include cutadapt/_seqio.c
+include bin/_preamble.py
+include tests/test*.py
+include tests/utils.py
+graft tests/data
+graft tests/cut
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..fcae283
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,41 @@
+.. image:: https://travis-ci.org/marcelm/cutadapt.svg?branch=master
+    :target: https://travis-ci.org/marcelm/cutadapt
+
+.. image:: https://img.shields.io/pypi/v/cutadapt.svg?branch=master
+    :target: https://pypi.python.org/pypi/cutadapt
+
+========
+cutadapt
+========
+
+Cutadapt finds and removes adapter sequences, primers, poly-A tails and other
+types of unwanted sequence from your high-throughput sequencing reads.
+
+Cleaning your data in this way is often required: Reads from small-RNA
+sequencing contain the 3’ sequencing adapter because the read is longer than
+the molecule that is sequenced. Amplicon reads start with a primer sequence.
+Poly-A tails are useful for pulling out RNA from your sample, but often you
+don’t want them to be in your reads.
+
+Cutadapt helps with these trimming tasks by finding the adapter or primer
+sequences in an error-tolerant way. It can also modify and filter reads in
+various ways. Adapter sequences can contain IUPAC wildcard characters. Also,
+paired-end reads and even colorspace data is supported. If you want, you can
+also just demultiplex your input data, without removing adapter sequences at all.
+
+Cutadapt comes with an extensive suite of automated tests and is available under
+the terms of the MIT license.
+
+If you use cutadapt, please cite
+`DOI:10.14806/ej.17.1.200 <http://dx.doi.org/10.14806/ej.17.1.200>`_ .
+
+
+Links
+-----
+
+* `Documentation <https://cutadapt.readthedocs.org/>`_
+* `Source code <https://github.com/marcelm/cutadapt/>`_
+* `Report an issue <https://github.com/marcelm/cutadapt/issues>`_
+* `Project page on PyPI (Python package index) <https://pypi.python.org/pypi/cutadapt/>`_
+* `Follow @marcelm_ on Twitter <https://twitter.com/marcelm_>`_
+* `Wrapper for the Galaxy platform <https://bitbucket.org/lance_parsons/cutadapt_galaxy_wrapper>`_
diff --git a/bin/_preamble.py b/bin/_preamble.py
new file mode 100644
index 0000000..55f392a
--- /dev/null
+++ b/bin/_preamble.py
@@ -0,0 +1,21 @@
+# Copyright (c) Twisted Matrix Laboratories.
+#
+# Copied from Twisted (http://twistedmatrix.com/), see
+# http://twistedmatrix.com/trac/browser/trunk/LICENSE for the license.
+#
+# This makes sure that users don't have to set up their environment
+# specially in order to run these programs from bin/.
+
+# This helper is shared by many different actual scripts.  It is not intended to
+# be packaged or installed, it is only a developer convenience.  By the time
+# the package is actually installed somewhere, the environment should already be set
+# up properly without the help of this tool.
+
+import sys, os
+
+path = os.path.abspath(sys.argv[0])
+while os.path.dirname(path) != path:
+    if os.path.exists(os.path.join(path, 'cutadapt', '__init__.py')):
+        sys.path.insert(0, path)
+        break
+    path = os.path.dirname(path)
diff --git a/bin/cutadapt b/bin/cutadapt
new file mode 100755
index 0000000..02c4c8d
--- /dev/null
+++ b/bin/cutadapt
@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+import sys
+
+try:
+	import _preamble
+except ImportError:
+	pass
+
+from cutadapt.scripts import cutadapt
+cutadapt.main()
diff --git a/cutadapt/__init__.py b/cutadapt/__init__.py
new file mode 100644
index 0000000..e3422c7
--- /dev/null
+++ b/cutadapt/__init__.py
@@ -0,0 +1,23 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+import sys
+
+__version__ = '1.10'
+
+def check_importability():  # pragma: no cover
+	try:
+		import cutadapt._align
+	except ImportError as e:
+		if 'undefined symbol' in str(e):
+			print("""
+ERROR: A required extension module could not be imported because it is
+incompatible with your system. A quick fix is to recompile the extension
+modules with the following command:
+
+    {0} setup.py build_ext -i
+
+See the documentation for alternative ways of installing the program.
+
+The original error message follows.
+""".format(sys.executable))
+		raise
diff --git a/cutadapt/_align.pyx b/cutadapt/_align.pyx
new file mode 100644
index 0000000..57bc0f8
--- /dev/null
+++ b/cutadapt/_align.pyx
@@ -0,0 +1,533 @@
+# cython: profile=False, emit_code_comments=False
+from cpython.mem cimport PyMem_Malloc, PyMem_Free, PyMem_Realloc
+
+DEF START_WITHIN_SEQ1 = 1
+DEF START_WITHIN_SEQ2 = 2
+DEF STOP_WITHIN_SEQ1 = 4
+DEF STOP_WITHIN_SEQ2 = 8
+DEF SEMIGLOBAL = 15
+
+# structure for a DP matrix entry
+ctypedef struct _Entry:
+	int cost
+	int matches  # no. of matches in this alignment
+	int origin   # where the alignment originated: negative for positions within seq1, positive for pos. within seq2
+
+
+ctypedef struct _Match:
+	int origin
+	int cost
+	int matches
+	int ref_stop
+	int query_stop
+
+
+def _acgt_table():
+	"""
+	Return a translation table that maps A, C, G, T characters to the lower
+	four bits of a byte. Other characters (including possibly IUPAC characters)
+	are mapped to zero.
+
+	Lowercase versions are also translated, and U is treated the same as T.
+	"""
+	d = dict(A=1, C=2, G=4, T=8, U=8)
+	t = bytearray(b'\0') * 256
+	for c, v in d.items():
+		t[ord(c)] = v
+		t[ord(c.lower())] = v
+	return bytes(t)
+
+
+def _iupac_table():
+	"""
+	Return a translation table for IUPAC characters.
+
+	The table maps ASCII-encoded IUPAC nucleotide characters to bytes in which
+	the four least significant bits are used to represent one nucleotide each.
+
+	Whether two characters x and y match can then be checked with the
+	expression "x & y != 0".
+	"""
+	A = 1
+	C = 2
+	G = 4
+	T = 8
+	d = dict(
+		X=0,
+		A=A,
+		C=C,
+		G=G,
+		T=T,
+		U=T,
+		R=A|G,
+		Y=C|T,
+		S=G|C,
+		W=A|T,
+		K=G|T,
+		M=A|C,
+		B=C|G|T,
+		D=A|G|T,
+		H=A|C|T,
+		V=A|C|G,
+		N=A|C|G|T
+	)
+	t = bytearray(b'\0') * 256
+	for c, v in d.items():
+		t[ord(c)] = v
+		t[ord(c.lower())] = v
+	return bytes(t)
+
+
+cdef bytes ACGT_TABLE = _acgt_table()
+cdef bytes IUPAC_TABLE = _iupac_table()
+
+
+class DPMatrix:
+	"""
+	Representation of the dynamic-programming matrix.
+
+	This used only when debugging is enabled in the Aligner class since the
+	matrix is normally not stored in full.
+
+	Entries in the matrix may be None, in which case that value was not
+	computed.
+	"""
+	def __init__(self, reference, query):
+		m = len(reference)
+		n = len(query)
+		self._rows = [ [None] * (n+1) for _ in range(m + 1) ]
+		self.reference = reference
+		self.query = query
+
+	def set_entry(self, int i, int j, cost):
+		"""
+		Set an entry in the dynamic programming matrix.
+		"""
+		self._rows[i][j] = cost
+
+	def __str__(self):
+		"""
+		Return a representation of the matrix as a string.
+		"""
+		rows = ['     ' + ' '.join(c.rjust(2) for c in self.query)]
+		for c, row in zip(' ' + self.reference, self._rows):
+			r = c + ' ' + ' '.join('  ' if v is None else '{0:2d}'.format(v) for v in row)
+			rows.append(r)
+		return '\n'.join(rows)
+
+
+cdef class Aligner:
+	"""
+	TODO documentation still uses s1 (reference) and s2 (query).
+
+	Locate one string within another by computing an optimal semiglobal
+	alignment between string1 and string2.
+
+	The alignment uses unit costs, which means that mismatches, insertions and deletions are
+	counted as one error.
+
+	flags is a bitwise 'or' of the allowed flags.
+	To allow skipping of a prefix of string1 at no cost, set the
+	START_WITHIN_SEQ1 flag.
+	To allow skipping of a prefix of string2 at no cost, set the
+	START_WITHIN_SEQ2 flag.
+	If both are set, a prefix of string1 or of string1 is skipped,
+	never both.
+	Similarly, set STOP_WITHIN_SEQ1 and STOP_WITHIN_SEQ2 to
+	allow skipping of suffixes of string1 or string2. Again, when both
+	flags are set, never suffixes in both strings are skipped.
+	If all flags are set, this results in standard semiglobal alignment.
+
+	The skipped parts are described with two intervals (start1, stop1),
+	(start2, stop2).
+
+	For example, an optimal semiglobal alignment of SISSI and MISSISSIPPI looks like this:
+
+	---SISSI---
+	MISSISSIPPI
+
+	start1, stop1 = 0, 5
+	start2, stop2 = 3, 8
+	(with zero errors)
+
+	The aligned parts are string1[start1:stop1] and string2[start2:stop2].
+
+	The error rate is: errors / length where length is (stop1 - start1).
+
+	An optimal alignment fulfills all of these criteria:
+
+	- its error_rate is at most max_error_rate
+	- Among those alignments with error_rate <= max_error_rate, the alignment contains
+	  a maximal number of matches (there is no alignment with more matches).
+	- If there are multiple alignments with the same no. of matches, then one that
+	  has minimal no. of errors is chosen.
+	- If there are still multiple candidates, choose the alignment that starts at the
+	  leftmost position within the read.
+
+	The alignment itself is not returned, only the tuple
+	(start1, stop1, start2, stop2, matches, errors), where the first four fields have the
+	meaning as described, matches is the number of matches and errors is the number of
+	errors in the alignment.
+
+	It is always the case that at least one of start1 and start2 is zero.
+
+	IUPAC wildcard characters can be allowed in the reference and the query
+	by setting the appropriate flags.
+
+	If neither flag is set, the full ASCII alphabet is used for comparison.
+	If any of the flags is set, all non-IUPAC characters in the sequences
+	compare as 'not equal'.
+	"""
+	cdef int m
+	cdef _Entry* column  # one column of the DP matrix
+	cdef double max_error_rate
+	cdef int flags
+	cdef int _insertion_cost
+	cdef int _deletion_cost
+	cdef int _min_overlap
+	cdef bint wildcard_ref
+	cdef bint wildcard_query
+	cdef bint debug
+	cdef object _dpmatrix
+	cdef bytes _reference  # TODO rename to translated_reference or so
+	cdef str str_reference
+
+	def __cinit__(self, str reference, double max_error_rate, int flags=SEMIGLOBAL, bint wildcard_ref=False, bint wildcard_query=False):
+		self.max_error_rate = max_error_rate
+		self.flags = flags
+		self.wildcard_ref = wildcard_ref
+		self.wildcard_query = wildcard_query
+		self.str_reference = reference
+		self.reference = reference
+		self._min_overlap = 1
+		self.debug = False
+		self._dpmatrix = None
+		self._insertion_cost = 1
+		self._deletion_cost = 1
+
+	property min_overlap:
+		def __get__(self):
+			return self._min_overlap
+
+		def __set__(self, int value):
+			if value < 1:
+				raise ValueError('Minimum overlap must be at least 1')
+			self._min_overlap = value
+
+	property indel_cost:
+		"""
+		Matches cost 0, mismatches cost 1. Only insertion/deletion costs can be
+		changed.
+		"""
+		def __set__(self, value):
+			if value < 1:
+				raise ValueError('Insertion/deletion cost must be at leat 1')
+			self._insertion_cost = value
+			self._deletion_cost = value
+
+	property reference:
+		def __get__(self):
+			return self._reference
+
+		def __set__(self, str reference):
+			mem = <_Entry*> PyMem_Realloc(self.column, (len(reference) + 1) * sizeof(_Entry))
+			if not mem:
+				raise MemoryError()
+			self.column = mem
+			self._reference = reference.encode('ascii')
+			self.m = len(reference)
+			if self.wildcard_ref:
+				self._reference = self._reference.translate(IUPAC_TABLE)
+			elif self.wildcard_query:
+				self._reference = self._reference.translate(ACGT_TABLE)
+			self.str_reference = reference
+
+	property dpmatrix:
+		"""
+		The dynamic programming matrix as a DPMatrix object. This attribute is
+		usually None, unless debugging has been enabled with enable_debug().
+		"""
+		def __get__(self):
+			return self._dpmatrix
+
+	def enable_debug(self):
+		"""
+		Store the dynamic programming matrix while running the locate() method
+		and make it available in the .dpmatrix attribute.
+		"""
+		self.debug = True
+
+	def locate(self, str query):
+		"""
+		locate(query) -> (refstart, refstop, querystart, querystop, matches, errors)
+
+		Find the query within the reference associated with this aligner. The
+		intervals (querystart, querystop) and (refstart, refstop) give the
+		location of the match.
+
+		That is, the substrings query[querystart:querystop] and
+		self.reference[refstart:refstop] were found to align best to each other,
+		with the given number of matches and the given number of errors.
+
+		The alignment itself is not returned.
+		"""
+		cdef char* s1 = self._reference
+		cdef bytes query_bytes = query.encode('ascii')
+		cdef char* s2 = query_bytes
+		cdef int m = self.m
+		cdef int n = len(query)
+		cdef _Entry* column = self.column
+		cdef double max_error_rate = self.max_error_rate
+		cdef bint start_in_ref = self.flags & START_WITHIN_SEQ1
+		cdef bint start_in_query = self.flags & START_WITHIN_SEQ2
+		cdef bint stop_in_ref = self.flags & STOP_WITHIN_SEQ1
+		cdef bint stop_in_query = self.flags & STOP_WITHIN_SEQ2
+
+		if self.wildcard_query:
+			query_bytes = query_bytes.translate(IUPAC_TABLE)
+			s2 = query_bytes
+		elif self.wildcard_ref:
+			query_bytes = query_bytes.translate(ACGT_TABLE)
+			s2 = query_bytes
+		cdef bint compare_ascii = not (self.wildcard_query or self.wildcard_ref)
+		"""
+		DP Matrix:
+		           query (j)
+		         ----------> n
+		        |
+		ref (i) |
+		        |
+		        V
+		       m
+		"""
+		cdef int i, j
+
+		# maximum no. of errors
+		cdef int k = <int> (max_error_rate * m)
+
+		# Determine largest and smallest column we need to compute
+		cdef int max_n = n
+		cdef int min_n = 0
+		if not start_in_query:
+			# costs can only get worse after column m
+			max_n = min(n, m + k)
+		if not stop_in_query:
+			min_n = max(0, n - m - k)
+
+		# Fill column min_n.
+		#
+		# Four cases:
+		# not startin1, not startin2: c(i,j) = max(i,j); origin(i, j) = 0
+		#     startin1, not startin2: c(i,j) = j       ; origin(i, j) = min(0, j - i)
+		# not startin1,     startin2: c(i,j) = i       ; origin(i, j) =
+		#     startin1,     startin2: c(i,j) = min(i,j)
+
+		# TODO (later)
+		# fill out columns only until 'last'
+		if not start_in_ref and not start_in_query:
+			for i in range(m + 1):
+				column[i].matches = 0
+				column[i].cost = max(i, min_n) * self._insertion_cost
+				column[i].origin = 0
+		elif start_in_ref and not start_in_query:
+			for i in range(m + 1):
+				column[i].matches = 0
+				column[i].cost = min_n * self._insertion_cost
+				column[i].origin = min(0, min_n - i)
+		elif not start_in_ref and start_in_query:
+			for i in range(m + 1):
+				column[i].matches = 0
+				column[i].cost = i * self._insertion_cost
+				column[i].origin = max(0, min_n - i)
+		else:
+			for i in range(m + 1):
+				column[i].matches = 0
+				column[i].cost = min(i, min_n) * self._insertion_cost
+				column[i].origin = min_n - i
+
+		if self.debug:
+			self._dpmatrix = DPMatrix(self.str_reference, query)
+			for i in range(m + 1):
+				self._dpmatrix.set_entry(i, min_n, column[i].cost)
+		cdef _Match best
+		best.ref_stop = m
+		best.query_stop = n
+		best.cost = m + n
+		best.origin = 0
+		best.matches = 0
+
+		# Ukkonen's trick: index of the last cell that is less than k.
+		cdef int last = min(m, k + 1)
+		if start_in_ref:
+			last = m
+
+		cdef int cost_diag
+		cdef int cost_deletion
+		cdef int cost_insertion
+		cdef int origin, cost, matches
+		cdef int length
+		cdef bint characters_equal
+		cdef _Entry tmp_entry
+
+		with nogil:
+			# iterate over columns
+			for j in range(min_n + 1, max_n + 1):
+				# remember first entry
+				tmp_entry = column[0]
+
+				# fill in first entry in this column
+				if start_in_query:
+					column[0].origin = j
+				else:
+					column[0].cost = j * self._insertion_cost
+				for i in range(1, last + 1):
+					if compare_ascii:
+						characters_equal = (s1[i-1] == s2[j-1])
+					else:
+						characters_equal = (s1[i-1] & s2[j-1]) != 0
+					if characters_equal:
+						# Characters match: This cannot be an indel.
+						cost = tmp_entry.cost
+						origin = tmp_entry.origin
+						matches = tmp_entry.matches + 1
+					else:
+						# Characters do not match.
+						cost_diag = tmp_entry.cost + 1
+						cost_deletion = column[i].cost + self._deletion_cost
+						cost_insertion = column[i-1].cost + self._insertion_cost
+
+						if cost_diag <= cost_deletion and cost_diag <= cost_insertion:
+							# MISMATCH
+							cost = cost_diag
+							origin = tmp_entry.origin
+							matches = tmp_entry.matches
+						elif cost_insertion <= cost_deletion:
+							# INSERTION
+							cost = cost_insertion
+							origin = column[i-1].origin
+							matches = column[i-1].matches
+						else:
+							# DELETION
+							cost = cost_deletion
+							origin = column[i].origin
+							matches = column[i].matches
+
+					# remember current cell for next iteration
+					tmp_entry = column[i]
+
+					column[i].cost = cost
+					column[i].origin = origin
+					column[i].matches = matches
+				if self.debug:
+					with gil:
+						for i in range(last + 1):
+							self._dpmatrix.set_entry(i, j, column[i].cost)
+				while last >= 0 and column[last].cost > k:
+					last -= 1
+				# last can be -1 here, but will be incremented next.
+				# TODO if last is -1, can we stop searching?
+				if last < m:
+					last += 1
+				elif stop_in_query:
+					# Found a match. If requested, find best match in last row.
+					# length of the aligned part of the reference
+					length = m + min(column[m].origin, 0)
+					cost = column[m].cost
+					matches = column[m].matches
+					if length >= self._min_overlap and cost <= length * max_error_rate and (matches > best.matches or (matches == best.matches and cost < best.cost)):
+						# update
+						best.matches = matches
+						best.cost = cost
+						best.origin = column[m].origin
+						best.ref_stop = m
+						best.query_stop = j
+						if cost == 0 and matches == m:
+							# exact match, stop early
+							break
+				# column finished
+
+		if max_n == n:
+			first_i = 0 if stop_in_ref else m
+			# search in last column # TODO last?
+			for i in range(first_i, m+1):
+				length = i + min(column[i].origin, 0)
+				cost = column[i].cost
+				matches = column[i].matches
+				if length >= self._min_overlap and cost <= length * max_error_rate and (matches > best.matches or (matches == best.matches and cost < best.cost)):
+					# update best
+					best.matches = matches
+					best.cost = cost
+					best.origin = column[i].origin
+					best.ref_stop = i
+					best.query_stop = n
+		if best.cost == m + n:
+			# best.cost was initialized with this value.
+			# If it is unchanged, no alignment was found that has
+			# an error rate within the allowed range.
+			return None
+
+		cdef int start1, start2
+		if best.origin >= 0:
+			start1 = 0
+			start2 = best.origin
+		else:
+			start1 = -best.origin
+			start2 = 0
+
+		assert best.ref_stop - start1 > 0  # Do not return empty alignments.
+		return (start1, best.ref_stop, start2, best.query_stop, best.matches, best.cost)
+
+	def __dealloc__(self):
+		PyMem_Free(self.column)
+
+
+def locate(str reference, str query, double max_error_rate, int flags=SEMIGLOBAL, bint wildcard_ref=False, bint wildcard_query=False, int min_overlap=1):
+	aligner = Aligner(reference, max_error_rate, flags, wildcard_ref, wildcard_query)
+	aligner.min_overlap = min_overlap
+	return aligner.locate(query)
+
+
+def compare_prefixes(str ref, str query, bint wildcard_ref=False, bint wildcard_query=False):
+	"""
+	Find out whether one string is the prefix of the other one, allowing
+	IUPAC wildcards in ref and/or query if the appropriate flag is set.
+
+	This is used to find an anchored 5' adapter (type 'FRONT') in the 'no indels' mode.
+	This is very simple as only the number of errors needs to be counted.
+
+	This function returns a tuple compatible with what Aligner.locate outputs.
+	"""
+	cdef int m = len(ref)
+	cdef int n = len(query)
+	cdef bytes query_bytes = query.encode('ascii')
+	cdef bytes ref_bytes = ref.encode('ascii')
+	cdef char* r_ptr
+	cdef char* q_ptr
+	cdef int length = min(m, n)
+	cdef int i, matches = 0
+	cdef bint compare_ascii = False
+
+	if wildcard_ref:
+		ref_bytes = ref_bytes.translate(IUPAC_TABLE)
+	elif wildcard_query:
+		ref_bytes = ref_bytes.translate(ACGT_TABLE)
+	else:
+		compare_ascii = True
+	if wildcard_query:
+		query_bytes = query_bytes.translate(IUPAC_TABLE)
+	elif wildcard_ref:
+		query_bytes = query_bytes.translate(ACGT_TABLE)
+
+	if compare_ascii:
+		for i in range(length):
+			if ref[i] == query[i]:
+				matches += 1
+	else:
+		r_ptr = ref_bytes
+		q_ptr = query_bytes
+		for i in range(length):
+			if (r_ptr[i] & q_ptr[i]) != 0:
+				matches += 1
+
+	# length - matches = no. of errors
+	return (0, length, 0, length, matches, length - matches)
diff --git a/cutadapt/_qualtrim.pyx b/cutadapt/_qualtrim.pyx
new file mode 100644
index 0000000..3bd88c7
--- /dev/null
+++ b/cutadapt/_qualtrim.pyx
@@ -0,0 +1,84 @@
+# kate: syntax Python;
+# cython: profile=False, emit_code_comments=False
+"""
+Quality trimming.
+"""
+
+def quality_trim_index(str qualities, int cutoff_front, int cutoff_back, int base=33):
+	"""
+	Find the positions at which to trim low-quality ends from a nucleotide sequence.
+	Return tuple (start, stop) that indicates the good-quality segment.
+
+	Qualities are assumed to be ASCII-encoded as chr(qual + base).
+
+	The algorithm is the same as the one used by BWA within the function
+	'bwa_trim_read':
+	- Subtract the cutoff value from all qualities.
+	- Compute partial sums from all indices to the end of the sequence.
+	- Trim sequence at the index at which the sum is minimal.
+	"""
+	cdef int s
+	cdef int max_qual
+	cdef int stop = len(qualities)
+	cdef int start = 0
+	cdef int i
+
+	# find trim position for 5' end
+	s = 0
+	max_qual = 0
+	for i in range(len(qualities)):
+		s += cutoff_front - (ord(qualities[i]) - base)
+		if s < 0:
+			break
+		if s > max_qual:
+			max_qual = s
+			start = i + 1
+
+	# same for 3' end
+	max_qual = 0
+	s = 0
+	for i in reversed(xrange(len(qualities))):
+		s += cutoff_back - (ord(qualities[i]) - base)
+		if s < 0:
+			break
+		if s > max_qual:
+			max_qual = s
+			stop = i
+	if start >= stop:
+		start, stop = 0, 0
+	return (start, stop)
+
+
+def nextseq_trim_index(sequence, int cutoff, int base=33):
+	"""
+	Variant of the above quality trimming routine that works on NextSeq data.
+	With Illumina NextSeq, bases are encoded with two colors. 'No color' (a
+	dark cycle) usually means that a 'G' was sequenced, but that also occurs
+	when sequencing falls off the end of the fragment. The read then contains
+	a run of high-quality G bases in the end.
+
+	This routine works as the one above, but counts qualities belonging to 'G'
+	bases as being equal to cutoff - 1.
+	"""
+	bases = sequence.sequence
+	qualities = sequence.qualities
+	cdef:
+		int s = 0
+		int max_qual = 0
+		int max_i = len(qualities)
+		int i, q
+
+	s = 0
+	max_qual = 0
+	max_i = len(qualities)
+	for i in reversed(xrange(max_i)):
+		q = ord(qualities[i]) - base
+		if bases[i] == 'G':
+			q = cutoff - 1
+		s += cutoff - q
+		if s < 0:
+			break
+		if s > max_qual:
+			max_qual = s
+			max_i = i
+	return max_i
diff --git a/cutadapt/_seqio.pyx b/cutadapt/_seqio.pyx
new file mode 100644
index 0000000..b687c0b
--- /dev/null
+++ b/cutadapt/_seqio.pyx
@@ -0,0 +1,138 @@
+# kate: syntax Python;
+# cython: profile=False, emit_code_comments=False
+from __future__ import print_function, division, absolute_import
+from .xopen import xopen
+from .seqio import _shorten, FormatError, SequenceReader
+
+
+cdef class Sequence(object):
+	"""
+	A record in a FASTQ file. Also used for FASTA (then the qualities attribute
+	is None). qualities is a string and it contains the qualities encoded as
+	ascii(qual+33).
+
+	If an adapter has been matched to the sequence, the 'match' attribute is
+	set to the corresponding Match instance.
+	"""
+	cdef:
+		public str name
+		public str sequence
+		public str qualities
+		public str name2
+		public object match
+		public object match_info
+
+	def __init__(self, str name, str sequence, str qualities=None, str name2='', match=None,
+				 match_info=None):
+		"""Set qualities to None if there are no quality values"""
+		self.name = name
+		self.sequence = sequence
+		self.qualities = qualities
+		self.name2 = name2
+		self.match = match
+		self.match_info = match_info
+		if qualities is not None and len(qualities) != len(sequence):
+			rname = _shorten(name)
+			raise FormatError("In read named {0!r}: length of quality sequence ({1}) and length "
+				"of read ({2}) do not match".format(
+					rname, len(qualities), len(sequence)))
+	
+	def __getitem__(self, key):
+		"""slicing"""
+		return self.__class__(
+			self.name,
+			self.sequence[key],
+			self.qualities[key] if self.qualities is not None else None,
+			self.name2,
+			self.match,
+			self.match_info)
+
+	def __repr__(self):
+		qstr = ''
+		if self.qualities is not None:
+			qstr = ', qualities={0!r}'.format(_shorten(self.qualities))
+		return '<Sequence(name={0!r}, sequence={1!r}{2})>'.format(_shorten(self.name), _shorten(self.sequence), qstr)
+
+	def __len__(self):
+		return len(self.sequence)
+
+	def __richcmp__(self, other, int op):
+		if 2 <= op <= 3:
+			eq = self.name == other.name and \
+				self.sequence == other.sequence and \
+				self.qualities == other.qualities
+			if op == 2:
+				return eq
+			else:
+				return not eq
+		else:
+			raise NotImplementedError()
+
+	def __reduce__(self):
+		return (Sequence, (self.name, self.sequence, self.qualities, self.name2))
+
+
+class FastqReader(SequenceReader):
+	"""
+	Reader for FASTQ files. Does not support multi-line FASTQ files.
+	"""
+	def __init__(self, file, sequence_class=Sequence):
+		"""
+		file is a filename or a file-like object.
+		If file is a filename, then .gz files are supported.
+		"""
+		super(FastqReader, self).__init__(file)
+		self.sequence_class = sequence_class
+		self.delivers_qualities = True
+
+	def __iter__(self):
+		"""
+		Yield Sequence objects
+		"""
+		cdef int i = 0
+		cdef int strip
+		cdef str line, name, qualities, sequence, name2
+		sequence_class = self.sequence_class
+
+		it = iter(self._file)
+		line = next(it)
+		if not (line and line[0] == '@'):
+			raise FormatError("Line {0} in FASTQ file is expected to start with '@', but found {1!r}".format(i+1, line[:10]))
+		strip = -2 if line.endswith('\r\n') else -1
+		name = line[1:strip]
+
+		i = 1
+		for line in it:
+			if i == 0:
+				if not (line and line[0] == '@'):
+					raise FormatError("Line {0} in FASTQ file is expected to start with '@', but found {1!r}".format(i+1, line[:10]))
+				name = line[1:strip]
+			elif i == 1:
+				sequence = line[:strip]
+			elif i == 2:
+				if line == '+\n':  # check most common case first
+					name2 = ''
+				else:
+					line = line[:strip]
+					if not (line and line[0] == '+'):
+						raise FormatError("Line {0} in FASTQ file is expected to start with '+', but found {1!r}".format(i+1, line[:10]))
+					if len(line) > 1:
+						if not line[1:] == name:
+							raise FormatError(
+								"At line {0}: Sequence descriptions in the FASTQ file don't match "
+								"({1!r} != {2!r}).\n"
+								"The second sequence description must be either empty "
+								"or equal to the first description.".format(i+1,
+									name, line[1:]))
+						name2 = name
+					else:
+						name2 = ''
+			elif i == 3:
+				if len(line) == len(sequence) - strip:
+					qualities = line[:strip]
+				else:
+					qualities = line.rstrip('\r\n')
+				yield sequence_class(name, sequence, qualities, name2=name2)
+			i = (i + 1) % 4
+		if i != 0:
+			raise FormatError("FASTQ file ended prematurely")
diff --git a/cutadapt/adapters.py b/cutadapt/adapters.py
new file mode 100644
index 0000000..f629c8f
--- /dev/null
+++ b/cutadapt/adapters.py
@@ -0,0 +1,569 @@
+# coding: utf-8
+"""
+Adapters
+"""
+from __future__ import print_function, division, absolute_import
+import sys
+import re
+from collections import defaultdict
+from cutadapt import align, colorspace
+from cutadapt.seqio import ColorspaceSequence, FastaReader
+
+# Constants for the find_best_alignment function.
+# The function is called with SEQ1 as the adapter, SEQ2 as the read.
+# TODO get rid of those constants, use strings instead
+BACK = align.START_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ1
+FRONT = align.START_WITHIN_SEQ2 | align.STOP_WITHIN_SEQ2 | align.START_WITHIN_SEQ1
+PREFIX = align.STOP_WITHIN_SEQ2
+SUFFIX = align.START_WITHIN_SEQ2
+ANYWHERE = align.SEMIGLOBAL
+LINKED = 'linked'
+
+
+def parse_braces(sequence):
+	"""
+	Replace all occurrences of ``x{n}`` (where x is any character) with n
+	occurrences of x. Raise ValueError if the expression cannot be parsed.
+
+	>>> parse_braces('TGA{5}CT')
+	TGAAAAACT
+	"""
+	# Simple DFA with four states, encoded in prev
+	result = ''
+	prev = None
+	for s in re.split('(\{|\})', sequence):
+		if s == '':
+			continue
+		if prev is None:
+			if s == '{':
+				raise ValueError('"{" must be used after a character')
+			if s == '}':
+				raise ValueError('"}" cannot be used here')
+			prev = s
+			result += s
+		elif prev == '{':
+			prev = int(s)
+			if not 0 <= prev <= 10000:
+				raise ValueError('Value {} invalid'.format(prev))
+		elif isinstance(prev, int):
+			if s != '}':
+				raise ValueError('"}" expected')
+			result = result[:-1] + result[-1] * prev
+			prev = None
+		else:
+			if s != '{':
+				raise ValueError('Expected "{"')
+			prev = '{'
+	# Check if we are in a non-terminating state
+	if isinstance(prev, int) or prev == '{':
+		raise ValueError("Unterminated expression")
+	return result
+
+
+class AdapterParser(object):
+	"""
+	Factory for Adapter classes that all use the same parameters (error rate,
+	indels etc.). The given **kwargs will be passed to the Adapter constructors.
+	"""
+	def __init__(self, colorspace=False, **kwargs):
+		self.colorspace = colorspace
+		self.constructor_args = kwargs
+		self.adapter_class = ColorspaceAdapter if colorspace else Adapter
+
+	def parse(self, spec, name=None, cmdline_type='back'):
+		"""
+		Parse an adapter specification not using ``file:`` notation and return
+		an object of an appropriate Adapter class. The notation for anchored
+		5' and 3' adapters is supported. If the name parameter is None, then
+		an attempt is made to extract the name from the specification
+		(If spec is 'name=ADAPTER', name will be 'name'.)
+
+		cmdline_type -- describes which commandline parameter was used (``-a``
+		is 'back', ``-b`` is 'anywhere', and ``-g`` is 'front').
+		"""
+		if name is None:
+			name, spec = self._extract_name(spec)
+		sequence = spec
+		types = dict(back=BACK, front=FRONT, anywhere=ANYWHERE)
+		if cmdline_type not in types:
+			raise ValueError('cmdline_type cannot be {0!r}'.format(cmdline_type))
+		where = types[cmdline_type]
+		if where == FRONT and spec.startswith('^'):  # -g ^ADAPTER
+			sequence, where = spec[1:], PREFIX
+		elif where == BACK:
+			sequence1, middle, sequence2 = spec.partition('...')
+			if middle == '...':
+				if not sequence1:  # -a ...ADAPTER
+					sequence = sequence1[3:]
+				elif not sequence2:  # -a ADAPTER...
+					sequence, where = spec[:-3], PREFIX
+				else:  # -a ADAPTER1...ADAPTER2
+					if self.colorspace:
+						raise NotImplementedError('Using linked adapters in colorspace is not supported')
+					if sequence1.startswith('^') or sequence2.endswith('$'):
+						raise NotImplementedError('Using "$" or "^" when '
+							'specifying a linked adapter is not supported')
+					return LinkedAdapter(sequence1, sequence2, name=name,
+						**self.constructor_args)
+			elif spec.endswith('$'):   # -a ADAPTER$
+				sequence, where = spec[:-1], SUFFIX
+		if not sequence:
+			raise ValueError("The adapter sequence is empty.")
+
+		return self.adapter_class(sequence, where, name=name, **self.constructor_args)
+
+	def parse_with_file(self, spec, cmdline_type='back'):
+		"""
+		Parse an adapter specification and yield appropriate Adapter classes.
+		This works like the parse() function above, but also supports the
+		``file:`` notation for reading adapters from an external FASTA
+		file. Since a file can contain multiple adapters, this
+		function is a generator.
+		"""
+		if spec.startswith('file:'):
+			# read adapter sequences from a file
+			with FastaReader(spec[5:]) as fasta:
+				for record in fasta:
+					name = record.name.split(None, 1)[0]
+					yield self.parse(record.sequence, name, cmdline_type)
+		else:
+			name, spec = self._extract_name(spec)
+			yield self.parse(spec, name, cmdline_type)
+
+	def _extract_name(self, spec):
+		"""
+		Parse an adapter specification given as 'name=adapt' into 'name' and 'adapt'.
+		"""
+		fields = spec.split('=', 1)
+		if len(fields) > 1:
+			name, spec = fields
+			name = name.strip()
+		else:
+			name = None
+		spec = spec.strip()
+		return name, spec
+
+	def parse_multi(self, back, anywhere, front):
+		"""
+		Parse all three types of commandline options that can be used to
+		specify adapters. back, anywhere and front are lists of strings,
+		corresponding to the respective commandline types (-a, -b, -g).
+
+		Return a list of appropriate Adapter classes.
+		"""
+		adapters = []
+		for specs, cmdline_type in (back, 'back'), (anywhere, 'anywhere'), (front, 'front'):
+			for spec in specs:
+				adapters.extend(self.parse_with_file(spec, cmdline_type))
+		return adapters
+
+
+class Match(object):
+	"""
+	TODO creating instances of this class is relatively slow and responsible for quite some runtime.
+	"""
+	__slots__ = ['astart', 'astop', 'rstart', 'rstop', 'matches', 'errors', 'front', 'adapter', 'read', 'length']
+	def __init__(self, astart, astop, rstart, rstop, matches, errors, front, adapter, read):
+		self.astart = astart
+		self.astop = astop
+		self.rstart = rstart
+		self.rstop = rstop
+		self.matches = matches
+		self.errors = errors
+		self.front = self._guess_is_front() if front is None else front
+		self.adapter = adapter
+		self.read = read
+		# Number of aligned characters in the adapter. If there are
+		# indels, this may be different from the number of characters
+		# in the read.
+		self.length = self.astop - self.astart
+		assert self.length > 0
+		assert self.errors / self.length <= self.adapter.max_error_rate
+		assert self.length - self.errors > 0
+
+	def __str__(self):
+		return 'Match(astart={0}, astop={1}, rstart={2}, rstop={3}, matches={4}, errors={5})'.format(
+			self.astart, self.astop, self.rstart, self.rstop, self.matches, self.errors)
+
+	def _guess_is_front(self):
+		"""
+		Return whether this is guessed to be a front adapter.
+
+		The match is assumed to be a front adapter when the first base of
+		the read is involved in the alignment to the adapter.
+		"""
+		return self.rstart == 0
+
+	def wildcards(self, wildcard_char='N'):
+		"""
+		Return a string that contains, for each wildcard character,
+		the character that it matches. For example, if the adapter
+		ATNGNA matches ATCGTA, then the string 'CT' is returned.
+
+		If there are indels, this is not reliable as the full alignment
+		is not available.
+		"""
+		wildcards = [ self.read.sequence[self.rstart + i:self.rstart + i + 1] for i in range(self.length)
+			if self.adapter.sequence[self.astart + i] == wildcard_char and self.rstart + i < len(self.read.sequence) ]
+		return ''.join(wildcards)
+
+	def rest(self):
+		"""
+		Return the part of the read before this match if this is a
+		'front' (5') adapter,
+		return the part after the match if this is not a 'front' adapter (3').
+		This can be an empty string.
+		"""
+		if self.front:
+			return self.read.sequence[:self.rstart]
+		else:
+			return self.read.sequence[self.rstop:]
+	
+	def get_info_record(self):
+		seq = self.read.sequence
+		qualities = self.read.qualities
+		info = (
+			self.read.name,
+			self.errors,
+			self.rstart,
+			self.rstop,
+			seq[0:self.rstart],
+			seq[self.rstart:self.rstop],
+			seq[self.rstop:],
+			self.adapter.name
+		)
+		if qualities:
+			info += (
+				qualities[0:self.rstart],
+				qualities[self.rstart:self.rstop],
+				qualities[self.rstop:]
+			)
+		else:
+			info += ('','','')
+		
+		return info
+
+def _generate_adapter_name(_start=[1]):
+	name = str(_start[0])
+	_start[0] += 1
+	return name
+
+
+class Adapter(object):
+	"""
+	An adapter knows how to match itself to a read.
+	In particular, it knows where it should be within the read and how to interpret
+	wildcard characters.
+
+	where --  One of the BACK, FRONT, PREFIX, SUFFIX or ANYWHERE constants.
+		This influences where the adapter is allowed to appear within in the
+		read and also which part of the read is removed.
+
+	sequence -- The adapter sequence as string. Will be converted to uppercase.
+		Also, Us will be converted to Ts.
+
+	max_error_rate -- Maximum allowed error rate. The error rate is
+		the number of errors in the alignment divided by the length
+		of the part of the alignment that matches the adapter.
+
+	minimum_overlap -- Minimum length of the part of the alignment
+		that matches the adapter.
+
+	read_wildcards -- Whether IUPAC wildcards in the read are allowed.
+
+	adapter_wildcards -- Whether IUPAC wildcards in the adapter are
+		allowed.
+
+	name -- optional name of the adapter. If not provided, the name is set to a
+		unique number.
+	"""
+	def __init__(self, sequence, where, max_error_rate=0.1, min_overlap=3,
+			read_wildcards=False, adapter_wildcards=True, name=None, indels=True):
+		self.debug = False
+		self.name = _generate_adapter_name() if name is None else name
+		self.sequence = parse_braces(sequence.upper().replace('U', 'T'))
+		assert len(self.sequence) > 0
+		self.where = where
+		self.max_error_rate = max_error_rate
+		self.min_overlap = min(min_overlap, len(self.sequence))
+		self.indels = indels
+		self.adapter_wildcards = adapter_wildcards and not set(self.sequence) <= set('ACGT')
+		self.read_wildcards = read_wildcards
+		# redirect trimmed() to appropriate function depending on adapter type
+		trimmers = {
+			FRONT: self._trimmed_front,
+			PREFIX: self._trimmed_front,
+			BACK: self._trimmed_back,
+			SUFFIX: self._trimmed_back,
+			ANYWHERE: self._trimmed_anywhere
+		}
+		self.trimmed = trimmers[where]
+		if where == ANYWHERE:
+			self._front_flag = None  # means: guess
+		else:
+			self._front_flag = where not in (BACK, SUFFIX)
+		# statistics about length of removed sequences
+		self.lengths_front = defaultdict(int)
+		self.lengths_back = defaultdict(int)
+		self.errors_front = defaultdict(lambda: defaultdict(int))
+		self.errors_back = defaultdict(lambda: defaultdict(int))
+		self.adjacent_bases = { 'A': 0, 'C': 0, 'G': 0, 'T': 0, '': 0 }
+
+		self.aligner = align.Aligner(self.sequence, self.max_error_rate,
+			flags=self.where, wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
+		self.aligner.min_overlap = self.min_overlap
+		if not self.indels:
+			# TODO
+			# When indels are disallowed, an entirely different algorithm
+			# should be used.
+			self.aligner.indel_cost = 100000
+
+	def __repr__(self):
+		return '<Adapter(name="{name}", sequence="{sequence}", where={where}, '\
+			'max_error_rate={max_error_rate}, min_overlap={min_overlap}, '\
+			'read_wildcards={read_wildcards}, '\
+			'adapter_wildcards={adapter_wildcards}, '\
+			'indels={indels})>'.format(**vars(self))
+
+	def enable_debug(self):
+		"""
+		Print out the dynamic programming matrix after matching a read to an
+		adapter.
+		"""
+		self.debug = True
+		self.aligner.enable_debug()
+
+	def match_to(self, read):
+		"""
+		Attempt to match this adapter to the given read.
+
+		Return an Match instance if a match was found;
+		return None if no match was found given the matching criteria (minimum
+		overlap length, maximum error rate).
+		"""
+		read_seq = read.sequence.upper()
+		pos = -1
+		# try to find an exact match first unless wildcards are allowed
+		if not self.adapter_wildcards:
+			if self.where == PREFIX:
+				pos = 0 if read_seq.startswith(self.sequence) else -1
+			elif self.where == SUFFIX:
+				pos = (len(read_seq) - len(self.sequence)) if read_seq.endswith(self.sequence) else -1
+			else:
+				pos = read_seq.find(self.sequence)
+		if pos >= 0:
+			match = Match(
+				0, len(self.sequence), pos, pos + len(self.sequence),
+				len(self.sequence), 0, self._front_flag, self, read)
+		else:
+			# try approximate matching
+			if not self.indels and self.where in (PREFIX, SUFFIX):
+				if self.where == PREFIX:
+					alignment = align.compare_prefixes(self.sequence, read_seq,
+						wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
+				else:
+					alignment = align.compare_suffixes(self.sequence, read_seq,
+						wildcard_ref=self.adapter_wildcards, wildcard_query=self.read_wildcards)
+				astart, astop, rstart, rstop, matches, errors = alignment
+				if astop - astart >= self.min_overlap and errors / (astop - astart) <= self.max_error_rate:
+					match = Match(*(alignment + (self._front_flag, self, read)))
+				else:
+					match = None
+			else:
+				alignment = self.aligner.locate(read_seq)
+				if self.debug:
+					print(self.aligner.dpmatrix)  # pragma: no cover
+				if alignment is None:
+					match = None
+				else:
+					astart, astop, rstart, rstop, matches, errors = alignment
+					match = Match(astart, astop, rstart, rstop, matches, errors, self._front_flag, self, read)
+
+		if match is None:
+			return None
+		assert match.length > 0 and match.errors / match.length <= self.max_error_rate, match
+		assert match.length >= self.min_overlap
+		return match
+
+	def _trimmed_anywhere(self, match):
+		"""Return a trimmed read"""
+		if match.front:
+			return self._trimmed_front(match)
+		else:
+			return self._trimmed_back(match)
+
+	def _trimmed_front(self, match):
+		"""Return a trimmed read"""
+		# TODO move away
+		self.lengths_front[match.rstop] += 1
+		self.errors_front[match.rstop][match.errors] += 1
+		return match.read[match.rstop:]
+
+	def _trimmed_back(self, match):
+		"""Return a trimmed read without the 3' (back) adapter"""
+		# TODO move away
+		self.lengths_back[len(match.read) - match.rstart] += 1
+		self.errors_back[len(match.read) - match.rstart][match.errors] += 1
+		adjacent_base = match.read.sequence[match.rstart-1:match.rstart]
+		if adjacent_base not in 'ACGT':
+			adjacent_base = ''
+		self.adjacent_bases[adjacent_base] += 1
+		return match.read[:match.rstart]
+
+	def __len__(self):
+		return len(self.sequence)
+
+
+class ColorspaceAdapter(Adapter):
+	def __init__(self, *args, **kwargs):
+		super(ColorspaceAdapter, self).__init__(*args, **kwargs)
+		has_nucleotide_seq = False
+		if set(self.sequence) <= set('ACGT'):
+			# adapter was given in basespace
+			self.nucleotide_sequence = self.sequence
+			has_nucleotide_seq = True
+			self.sequence = colorspace.encode(self.sequence)[1:]
+		if self.where in (PREFIX, FRONT) and not has_nucleotide_seq:
+			raise ValueError("A 5' colorspace adapter needs to be given in nucleotide space")
+		self.aligner.reference = self.sequence
+
+	def match_to(self, read):
+		"""Return Match instance"""
+		if self.where != PREFIX:
+			return super(ColorspaceAdapter, self).match_to(read)
+		# create artificial adapter that includes a first color that encodes the
+		# transition from primer base into adapter
+		asequence = colorspace.ENCODE[read.primer + self.nucleotide_sequence[0:1]] + self.sequence
+
+		pos = 0 if read.sequence.startswith(asequence) else -1
+		if pos >= 0:
+			match = Match(
+				0, len(asequence), pos, pos + len(asequence),
+				len(asequence), 0, self._front_flag, self, read)
+		else:
+			# try approximate matching
+			self.aligner.reference = asequence
+			alignment = self.aligner.locate(read.sequence)
+			if self.debug:
+				print(self.aligner.dpmatrix)  # pragma: no cover
+			if alignment is not None:
+				match = Match(*(alignment + (self._front_flag, self, read)))
+			else:
+				match = None
+
+		if match is None:
+			return None
+		assert match.length > 0 and match.errors / match.length <= self.max_error_rate
+		assert match.length >= self.min_overlap
+		return match
+
+	def _trimmed_front(self, match):
+		"""Return a trimmed read"""
+		read = match.read
+		self.lengths_front[match.rstop] += 1
+		self.errors_front[match.rstop][match.errors] += 1
+		# to remove a front adapter, we need to re-encode the first color following the adapter match
+		color_after_adapter = read.sequence[match.rstop:match.rstop + 1]
+		if not color_after_adapter:
+			# the read is empty
+			return read[match.rstop:]
+		base_after_adapter = colorspace.DECODE[self.nucleotide_sequence[-1:] + color_after_adapter]
+		new_first_color = colorspace.ENCODE[read.primer + base_after_adapter]
+		new_read = read[:]
+		new_read.sequence = new_first_color + read.sequence[(match.rstop + 1):]
+		new_read.qualities = read.qualities[match.rstop:] if read.qualities else None
+		return new_read
+
+	def _trimmed_back(self, match):
+		"""Return a trimmed read"""
+		# trim one more color if long enough
+		adjusted_rstart = max(match.rstart - 1, 0)
+		self.lengths_back[len(match.read) - adjusted_rstart] += 1
+		self.errors_back[len(match.read) - adjusted_rstart][match.errors] += 1
+		return match.read[:adjusted_rstart]
+
+	def __repr__(self):
+		return '<ColorspaceAdapter(sequence={0!r}, where={1})>'.format(self.sequence, self.where)
+
+
+class LinkedMatch(object):
+	"""
+	Represent a match of a LinkedAdapter.
+
+	TODO
+	It shouldn’t be necessary to have both a Match and a LinkedMatch class.
+	"""
+	def __init__(self, front_match, back_match, adapter):
+		self.front_match = front_match
+		self.back_match = back_match
+		self.adapter = adapter
+		assert front_match is not None
+
+
+class LinkedAdapter(object):
+	"""
+	"""
+	def __init__(self, front_sequence, back_sequence,
+			front_anchored=True, back_anchored=False, name=None, **kwargs):
+		"""
+		kwargs are passed on to individual Adapter constructors
+		"""
+		assert front_anchored and not back_anchored
+		where1 = PREFIX if front_anchored else FRONT
+		where2 = SUFFIX if back_anchored else BACK
+		self.front_anchored = front_anchored
+		self.back_anchored = back_anchored
+
+		# The following attributes are needed for the report
+		self.where = LINKED
+		self.name = _generate_adapter_name() if name is None else name
+		self.front_adapter = Adapter(front_sequence, where=where1, name=None, **kwargs)
+		self.back_adapter = Adapter(back_sequence, where=where2, name=None, **kwargs)
+
+	def enable_debug(self):
+		self.front_adapter.enable_debug()
+		self.back_adapter.enable_debug()
+
+	def match_to(self, read):
+		"""
+		Match the linked adapters against the given read. If the 'front' adapter
+		is not found, the 'back' adapter is not searched for.
+		"""
+		front_match = self.front_adapter.match_to(read)
+		if front_match is None:
+			return None
+		# TODO use match.trimmed() instead as soon as that does not update
+		# statistics anymore
+		read = read[front_match.rstop:]
+		back_match = self.back_adapter.match_to(read)
+		return LinkedMatch(front_match, back_match, self)
+
+	def trimmed(self, match):
+		front_trimmed = self.front_adapter.trimmed(match.front_match)
+		if match.back_match:
+			return self.back_adapter.trimmed(match.back_match)
+		else:
+			return front_trimmed
+
+	# Lots of forwarders (needed for the report). I’m sure this can be done
+	# in a better way.
+
+	@property
+	def lengths_front(self):
+		return self.front_adapter.lengths_front
+
+	@property
+	def lengths_back(self):
+		return self.back_adapter.lengths_back
+
+	@property
+	def errors_front(self):
+		return self.front_adapter.errors_front
+
+	@property
+	def errors_back(self):
+		return self.back_adapter.errors_back
+
+	@property
+	def adjacent_bases(self):
+		return self.back_adapter.adjacent_bases
diff --git a/cutadapt/align.py b/cutadapt/align.py
new file mode 100644
index 0000000..aabd208
--- /dev/null
+++ b/cutadapt/align.py
@@ -0,0 +1,35 @@
+# coding: utf-8
+"""
+Alignment module.
+"""
+from __future__ import print_function, division, absolute_import
+
+from cutadapt._align import Aligner, compare_prefixes, locate
+
+# flags for global alignment
+
+# The interpretation of the first flag is:
+# An initial portion of seq1 may be skipped at no cost.
+# This is equivalent to saying that in the alignment,
+# gaps in the beginning of seq2 are free.
+#
+# The other flags have an equivalent meaning.
+START_WITHIN_SEQ1 = 1
+START_WITHIN_SEQ2 = 2
+STOP_WITHIN_SEQ1 = 4
+STOP_WITHIN_SEQ2 = 8
+
+# Use this to get regular semiglobal alignment
+# (all gaps in the beginning or end are free)
+SEMIGLOBAL = START_WITHIN_SEQ1 | START_WITHIN_SEQ2 | STOP_WITHIN_SEQ1 | STOP_WITHIN_SEQ2
+
+
+def compare_suffixes(s1, s2, wildcard_ref=False, wildcard_query=False):
+	"""
+	Find out whether one string is the suffix of the other one, allowing
+	mismatches. Used to find an anchored 3' adapter when no indels are allowed.
+	"""
+	s1 = s1[::-1]
+	s2 = s2[::-1]
+	_, length, _, _, matches, errors = compare_prefixes(s1, s2, wildcard_ref, wildcard_query)
+	return (len(s1) - length, len(s1), len(s2) - length, len(s2), matches, errors)
diff --git a/cutadapt/colorspace.py b/cutadapt/colorspace.py
new file mode 100644
index 0000000..4512941
--- /dev/null
+++ b/cutadapt/colorspace.py
@@ -0,0 +1,83 @@
+# coding: utf-8
+"""
+Colorspace conversion routines.
+
+Inspired by agapython/util/Dibase.py from Corona lite,
+but reimplemented to avoid licensing issues.
+
+Encoding Table
+
+  A C G T
+A 0 1 2 3
+C 1 0 3 2
+G 2 3 0 1
+T 3 2 1 0
+"""
+from __future__ import print_function, division, absolute_import
+
+__author__ = 'Marcel Martin'
+
+
+def _initialize_dicts():
+	"""
+	Create the colorspace encoding and decoding dictionaries.
+	"""
+	enc = {}
+	for i, c1 in enumerate("ACGT"):
+		enc['N' + c1] = '4'
+		enc[c1 + 'N'] = '4'
+		enc['.' + c1] = '4'
+		enc[c1 + '.'] = '4'
+		for j, c2 in enumerate("ACGT"):
+			# XOR of nucleotides gives color
+			enc[c1 + c2] = chr(ord('0') + (i ^ j))
+	enc.update({ 'NN': '4', 'N.': '4', '.N': '4', '..': '4'})
+
+	dec = {}
+	for i, c1 in enumerate("ACGT"):
+		dec['.' + str(i)] = 'N'
+		dec['N' + str(i)] = 'N'
+		dec[c1 + '4'] = 'N'
+		dec[c1 + '.'] = 'N'
+		for j, c2 in enumerate("ACGT"):
+			# XOR of nucleotides gives color
+			dec[c1 + chr(ord('0') + (i ^ j))] = c2
+	dec['N4'] = 'N'
+
+	return (enc, dec)
+
+
+def encode(s):
+	"""
+	Given a sequence of nucleotides, convert them to
+	colorspace. Only uppercase characters are allowed.
+	>>> encode("ACGGTC")
+	"A13012"
+	"""
+	if not s:
+		return s
+	r = s[0:1]
+	for i in range(len(s) - 1):
+		r += ENCODE[s[i:i+2]]
+	return r
+
+
+def decode(s):
+	"""
+	Decode a sequence of colors to nucleotide space.
+	The first character in s must be a nucleotide.
+	Only uppercase characters are allowed.
+	>>> decode("A13012")
+	"ACGGTC"
+	"""
+	if len(s) < 2:
+		return s
+	x = s[0]
+	result = x
+	for c in s[1:]:
+		x = DECODE[x + c]
+		result += x
+	return result
+
+
+(ENCODE, DECODE) = _initialize_dicts()
diff --git a/cutadapt/compat.py b/cutadapt/compat.py
new file mode 100644
index 0000000..2289948
--- /dev/null
+++ b/cutadapt/compat.py
@@ -0,0 +1,45 @@
+# coding: utf-8
+"""
+Minimal Py2/Py3 compatibility library.
+"""
+from __future__ import print_function, division, absolute_import
+import sys
+PY3 = sys.version > '3'
+
+
+if PY3:
+	maketrans = str.maketrans
+	basestring = str
+	zip = zip
+	next = next
+
+	def bytes_to_str(s):
+		return s.decode('ascii')
+
+	def str_to_bytes(s):
+		return s.encode('ascii')
+
+	def force_str(s):
+		if isinstance(s, bytes):
+			return s.decode('ascii')
+		else:
+			return s
+	from io import StringIO
+
+else:
+	def bytes_to_str(s):
+		return s
+
+	def str_to_bytes(s):
+		return s
+
+	def force_str(s):
+		return s
+
+	def next(it):
+		return it.next()
+
+	from string import maketrans
+	basestring = basestring
+	from itertools import izip as zip
+	from StringIO import StringIO
diff --git a/cutadapt/filters.py b/cutadapt/filters.py
new file mode 100644
index 0000000..3ab1e04
--- /dev/null
+++ b/cutadapt/filters.py
@@ -0,0 +1,256 @@
+# coding: utf-8
+"""
+Classes for writing and filtering of processed reads.
+
+A Filter is a callable that has the read as its only argument. If it is called,
+it returns True if the read should be filtered (discarded), and False if not.
+
+To be used, a filter needs to be wrapped in one of the redirector classes.
+They are called so because they can redirect filtered reads to a file if so
+desired. They also keep statistics.
+
+To determine what happens to a read, a list of redirectors with different
+filters is created and each redirector is called in turn until one returns True.
+The read is then assumed to have been "consumed", that is, either written
+somewhere or filtered (should be discarded).
+"""
+from __future__ import print_function, division, absolute_import
+from .xopen import xopen
+from . import seqio
+
+# Constants used when returning from a Filter’s __call__ method to improve
+# readability (it is unintuitive that "return True" means "discard the read").
+DISCARD = True
+KEEP = False
+
+
+class NoFilter(object):
+	"""
+	No filtering, just send each read to the given writer.
+	"""
+	def __init__(self, writer):
+		self.filtered = 0
+		self.writer = writer
+		self.filter = filter
+		self.written = 0  # no of written reads  TODO move to writer
+		self.written_bp = [0, 0]
+
+	def __call__(self, read):
+		self.writer.write(read)
+		self.written += 1
+		self.written_bp[0] += len(read)
+		return DISCARD
+
+
+class PairedNoFilter(object):
+	"""
+	No filtering, just send each paired-end read to the given writer.
+	"""
+	def __init__(self, writer):
+		self.filtered = 0
+		self.writer = writer
+		self.written = 0  # no of written reads or read pairs  TODO move to writer
+		self.written_bp = [0, 0]
+
+	def __call__(self, read1, read2):
+		self.writer.write(read1, read2)
+		self.written += 1
+		self.written_bp[0] += len(read1)
+		self.written_bp[1] += len(read2)
+		return DISCARD
+
+
+class Redirector(object):
+	"""
+	Redirect discarded reads to the given writer. This is for single-end reads.
+	"""
+	def __init__(self, writer, filter):
+		self.filtered = 0
+		self.writer = writer
+		self.filter = filter
+		self.written = 0  # no of written reads  TODO move to writer
+		self.written_bp = [0, 0]
+
+	def __call__(self, read):
+		if self.filter(read):
+			self.filtered += 1
+			if self.writer is not None:
+				self.writer.write(read)
+				self.written += 1
+				self.written_bp[0] += len(read)
+			return DISCARD
+		return KEEP
+
+
+class PairedRedirector(object):
+	"""
+	Redirect discarded reads to the given writer. This is for paired-end reads,
+	using the 'new-style' filtering where both reads are inspected. That is,
+	the entire pair is discarded if at least 1 or 2 of the reads match the
+	filtering criteria.
+	"""
+	def __init__(self, writer, filter, min_affected=1):
+		"""
+		min_affected -- values 1 and 2 are allowed.
+			1 means: the pair is discarded if any read matches
+			2 means: the pair is discarded if both reads match
+		"""
+		if not min_affected in (1, 2):
+			raise ValueError("min_affected must be 1 or 2")
+		self.filtered = 0
+		self.writer = writer
+		self.filter = filter
+		self._min_affected = min_affected
+		self.written = 0  # no of written reads or read pairs  TODO move to writer
+		self.written_bp = [0, 0]
+
+	def __call__(self, read1, read2):
+		if self.filter(read1) + self.filter(read2) >= self._min_affected:
+			self.filtered += 1
+			# discard read
+			if self.writer is not None:
+				self.writer.write(read1, read2)
+				self.written += 1
+				self.written_bp[0] += len(read1)
+				self.written_bp[1] += len(read2)
+			return DISCARD
+		return KEEP
+
+
+class LegacyPairedRedirector(object):
+	"""
+	Redirect discarded reads to the given writer. This is for paired-end reads,
+	using the 'legacy' filtering mode (backwards compatibility). That is, if
+	the first read matches the filtering criteria, the pair is discarded. The
+	second read is not inspected.
+	"""
+	def __init__(self, writer, filter):
+		self.filtered = 0
+		self.writer = writer
+		self.filter = filter
+		self.written = 0  # no of written reads or read pairs  TODO move to writer
+		self.written_bp = [0, 0]
+
+	def __call__(self, read1, read2):
+		if self.filter(read1):
+			self.filtered += 1
+			# discard read
+			if self.writer is not None:
+				self.writer.write(read1, read2)
+				self.written += 1
+				self.written_bp[0] += len(read1)
+				self.written_bp[1] += len(read2)
+			return DISCARD
+		return KEEP
+
+
+class TooShortReadFilter(object):
+	# TODO paired_outfile is left at its default value None (read2 is silently discarded)
+	def __init__(self, minimum_length):
+		self.minimum_length = minimum_length
+
+	def __call__(self, read):
+		return len(read) < self.minimum_length
+
+
+class TooLongReadFilter(object):
+	def __init__(self, maximum_length):
+		self.maximum_length = maximum_length
+
+	def __call__(self, read):
+		return len(read) > self.maximum_length
+
+
+class NContentFilter(object):
+	"""
+	Discards a reads that has a number of 'N's over a given threshold. It handles both raw counts of Ns as well
+	as proportions. Note, for raw counts, it is a greater than comparison, so a cutoff
+	of '1' will keep reads with a single N in it.
+	"""
+	def __init__(self, count):
+		"""
+		Count -- if it is below 1.0, it will be considered a proportion, and above and equal to
+		1 will be considered as discarding reads with a number of N's greater than this cutoff.
+		"""
+		assert count >= 0
+		self.is_proportion = count < 1.0
+		self.cutoff = count
+
+	def __call__(self, read):
+		"""Return True when the read should be discarded"""
+		n_count = read.sequence.lower().count('n')
+		if self.is_proportion:
+			if len(read) == 0:
+				return False
+			return n_count / len(read) > self.cutoff
+		else:
+			return n_count > self.cutoff
+
+
+class DiscardUntrimmedFilter(object):
+	"""
+	Return True if read is untrimmed.
+	"""
+	def __call__(self, read):
+		return read.match is None
+
+
+class DiscardTrimmedFilter(object):
+	"""
+	Return True if read is trimmed.
+	"""
+	def __call__(self, read):
+		return read.match is not None
+
+
+class Demultiplexer(object):
+	"""
+	Demultiplex trimmed reads. Reads are written to different output files
+	depending on which adapter matches. Files are created when the first read
+	is written to them.
+	"""
+	def __init__(self, path_template, untrimmed_path, colorspace, qualities):
+		"""
+		path_template must contain the string '{name}', which will be replaced
+		with the name of the adapter to form the final output path.
+		Reads without an adapter match are written to the file named by
+		untrimmed_path.
+		"""
+		assert '{name}' in path_template
+		self.template = path_template
+		self.untrimmed_path = untrimmed_path
+		self.untrimmed_writer = None
+		self.writers = dict()
+		self.written = 0
+		self.written_bp = [0, 0]
+		self.colorspace = colorspace
+		self.qualities = qualities
+
+	def __call__(self, read1, read2=None):
+		if read2 is None:
+			# single-end read
+			if read1.match is None:
+				if self.untrimmed_writer is None and self.untrimmed_path is not None:
+					self.untrimmed_writer = seqio.open(self.untrimmed_path,
+						mode='w', colorspace=self.colorspace, qualities=self.qualities)
+				if self.untrimmed_writer is not None:
+					self.written += 1
+					self.written_bp[0] += len(read1)
+					self.untrimmed_writer.write(read1)
+			else:
+				name = read1.match.adapter.name
+				if name not in self.writers:
+					self.writers[name] = seqio.open(self.template.replace('{name}', name),
+						mode='w', colorspace=self.colorspace, qualities=self.qualities)
+				self.written += 1
+				self.written_bp[0] += len(read1)
+				self.writers[name].write(read1)
+			return DISCARD
+		else:
+			assert False, "Not supported"  # pragma: no cover
+
+	def close(self):
+		for w in self.writers.values():
+			w.close()
+		if self.untrimmed_writer is not None:
+			self.untrimmed_writer.close()
diff --git a/cutadapt/modifiers.py b/cutadapt/modifiers.py
new file mode 100644
index 0000000..af4944d
--- /dev/null
+++ b/cutadapt/modifiers.py
@@ -0,0 +1,275 @@
+# coding: utf-8
+"""
+This module implements all the read modifications that cutadapt supports.
+A modifier must be callable. It is implemented as a function if no parameters
+need to be stored, and as a class with a __call__ method if there are parameters
+(or statistics).
+"""
+from __future__ import print_function, division, absolute_import
+import re
+from cutadapt.qualtrim import quality_trim_index, nextseq_trim_index
+from cutadapt.compat import maketrans
+
+
+class AdapterCutter(object):
+	"""
+	Repeatedly find one of multiple adapters in reads.
+	The number of times the search is repeated is specified by the
+	times parameter.
+	"""
+
+	def __init__(self, adapters, times=1, wildcard_file=None, info_file=None,
+			rest_writer=None, action='trim'):
+		"""
+		adapters -- list of Adapter objects
+
+		action -- What to do with a found adapter: None, 'trim', or 'mask'
+		"""
+		self.adapters = adapters
+		self.times = times
+		self.wildcard_file = wildcard_file
+		self.info_file = info_file
+		self.rest_writer = rest_writer
+		self.action = action
+		self.with_adapters = 0
+		self.keep_match_info = self.info_file is not None
+
+	def _best_match(self, read):
+		"""
+		Find the best matching adapter in the given read.
+
+		Return either a Match instance or None if there are no matches.
+		"""
+		best = None
+		for adapter in self.adapters:
+			match = adapter.match_to(read)
+			if match is None:
+				continue
+
+			# the no. of matches determines which adapter fits best
+			if best is None or match.matches > best.matches:
+				best = match
+		return best
+
+	def _write_info(self, read):
+		"""
+		Write to the info, wildcard and rest files.
+		# TODO
+		# This design with a read having a .match attribute and
+		# a match having a .read attribute is really confusing.
+		"""
+		match = read.match
+		if self.rest_writer and match:
+			self.rest_writer.write(match)
+
+		if self.wildcard_file and match:
+			print(match.wildcards(), read.name, file=self.wildcard_file)
+
+		if self.info_file:
+			if read.match_info:
+				for m in read.match_info:
+					print(*m, sep='\t', file=self.info_file)
+			else:
+				seq = read.sequence
+				qualities = read.qualities if read.qualities is not None else ''
+				print(read.name, -1, seq, qualities, sep='\t', file=self.info_file)
+
+	def __call__(self, read):
+		"""
+		Determine the adapter that best matches the given read.
+		Since the best adapter is searched repeatedly, a list
+		of Match instances is returned, which
+		need to be applied consecutively to the read.
+		The list is empty if there are no adapter matches.
+
+		The read is converted to uppercase before it is compared to the adapter
+		sequences.
+
+		Cut found adapters from a single read. Return modified read.
+		"""
+		matches = []
+
+		# try at most self.times times to remove an adapter
+		trimmed_read = read
+		for t in range(self.times):
+			match = self._best_match(trimmed_read)
+			if match is None:
+				# nothing found
+				break
+			matches.append(match)
+			trimmed_read = match.adapter.trimmed(match)
+		
+		if not matches:
+			trimmed_read.match = None
+			trimmed_read.match_info = None
+			self._write_info(trimmed_read)
+			return trimmed_read
+		
+		if __debug__:
+			assert len(trimmed_read) < len(read), "Trimmed read isn't shorter than original"
+
+		if self.action == 'trim':
+			# read is already trimmed, nothing to do
+			pass
+		elif self.action == 'mask':
+			# add N from last modification
+			masked_sequence = trimmed_read.sequence
+			for match in sorted(matches, reverse=True, key=lambda m: m.astart):
+				ns = 'N' * (len(match.read.sequence) -
+							len(match.adapter.trimmed(match).sequence))
+				# add N depending on match position
+				if match.front:
+					masked_sequence = ns + masked_sequence
+				else:
+					masked_sequence += ns
+			# set masked sequence as sequence with original quality
+			trimmed_read.sequence = masked_sequence
+			trimmed_read.qualities = matches[0].read.qualities
+
+			assert len(trimmed_read.sequence) == len(read)
+		elif self.action is None:
+			trimmed_read = read
+		
+		trimmed_read.match = matches[-1]
+		if self.keep_match_info:
+			trimmed_read.match_info = [match.get_info_record() for match in matches]
+		self._write_info(trimmed_read)
+		
+		self.with_adapters += 1
+		return trimmed_read
+
+
+class UnconditionalCutter(object):
+	"""
+	A modifier that unconditionally removes the first n or the last n bases from a read.
+
+	If the length is positive, the bases are removed from the beginning of the read.
+	If the length is negative, the bases are removed from the end of the read.
+	"""
+	def __init__(self, length):
+		self.length = length
+
+	def __call__(self, read):
+		if self.length > 0:
+			return read[self.length:]
+		elif self.length < 0:
+			return read[:self.length]
+
+
+class LengthTagModifier(object):
+	"""
+	Replace "length=..." strings in read names.
+	"""
+	def __init__(self, length_tag):
+		self.regex = re.compile(r"\b" + length_tag + r"[0-9]*\b")
+		self.length_tag = length_tag
+
+	def __call__(self, read):
+		read = read[:]
+		if read.name.find(self.length_tag) >= 0:
+			read.name = self.regex.sub(self.length_tag + str(len(read.sequence)), read.name)
+		return read
+
+
+class SuffixRemover(object):
+	"""
+	Remove a given suffix from read names.
+	"""
+	def __init__(self, suffix):
+		self.suffix = suffix
+
+	def __call__(self, read):
+		read = read[:]
+		if read.name.endswith(self.suffix):
+			read.name = read.name[:-len(self.suffix)]
+		return read
+
+
+class PrefixSuffixAdder(object):
+	"""
+	Add a suffix and a prefix to read names
+	"""
+	def __init__(self, prefix, suffix):
+		self.prefix = prefix
+		self.suffix = suffix
+
+	def __call__(self, read):
+		read = read[:]
+		adapter_name = 'no_adapter' if read.match is None else read.match.adapter.name
+		read.name = self.prefix.replace('{name}', adapter_name) + read.name + \
+			self.suffix.replace('{name}', adapter_name)
+		return read
+
+
+class DoubleEncoder(object):
+	"""
+	Double-encode colorspace reads, using characters ACGTN to represent colors.
+	"""
+	def __init__(self):
+		self.double_encode_trans = maketrans('0123.', 'ACGTN')
+
+	def __call__(self, read):
+		read = read[:]
+		read.sequence = read.sequence.translate(self.double_encode_trans)
+		return read
+
+
+class ZeroCapper(object):
+	"""
+	Change negative quality values of a read to zero
+	"""
+	def __init__(self, quality_base=33):
+		qb = quality_base
+		self.zero_cap_trans = maketrans(''.join(map(chr, range(qb))), chr(qb) * qb)
+
+	def __call__(self, read):
+		read = read[:]
+		read.qualities = read.qualities.translate(self.zero_cap_trans)
+		return read
+
+
+def PrimerTrimmer(read):
+	"""Trim primer base from colorspace reads"""
+	read = read[1:]
+	read.primer = ''
+	return read
+
+
+class NextseqQualityTrimmer(object):
+	def __init__(self, cutoff, base):
+		self.cutoff = cutoff
+		self.base = base
+		self.trimmed_bases = 0
+
+	def __call__(self, read):
+		stop = nextseq_trim_index(read, self.cutoff, self.base)
+		self.trimmed_bases += len(read) - stop
+		return read[:stop]
+
+
+class QualityTrimmer(object):
+	def __init__(self, cutoff_front, cutoff_back, base):
+		self.cutoff_front = cutoff_front
+		self.cutoff_back = cutoff_back
+		self.base = base
+		self.trimmed_bases = 0
+
+	def __call__(self, read):
+		start, stop = quality_trim_index(read.qualities, self.cutoff_front, self.cutoff_back, self.base)
+		self.trimmed_bases += len(read) - (stop - start)
+		return read[start:stop]
+
+
+class NEndTrimmer(object):
+	"""Trims Ns from the 3' and 5' end of reads"""
+	def __init__(self):
+		self.start_trim = re.compile(r'^N+')
+		self.end_trim = re.compile(r'N+$')
+
+	def __call__(self, read):
+		sequence = read.sequence
+		start_cut = self.start_trim.match(sequence)
+		end_cut = self.end_trim.search(sequence)
+		start_cut = start_cut.end() if start_cut else 0
+		end_cut = end_cut.start() if end_cut else len(read)
+		return read[start_cut:end_cut]
diff --git a/cutadapt/qualtrim.py b/cutadapt/qualtrim.py
new file mode 100644
index 0000000..ea79132
--- /dev/null
+++ b/cutadapt/qualtrim.py
@@ -0,0 +1,70 @@
+# coding: utf-8
+"""
+Quality trimming.
+"""
+from __future__ import print_function, division, absolute_import
+
+import sys
+
+if sys.version > '3':
+	xrange = range
+
+
+def quality_trim_index(qualities, cutoff, base=33):
+	"""
+	Find the position at which to trim a low-quality end from a nucleotide sequence.
+
+	Qualities are assumed to be ASCII-encoded as chr(qual + base).
+
+	The algorithm is the same as the one used by BWA within the function
+	'bwa_trim_read':
+	- Subtract the cutoff value from all qualities.
+	- Compute partial sums from all indices to the end of the sequence.
+	- Trim sequence at the index at which the sum is minimal.
+	"""
+	s = 0
+	max_qual = 0
+	max_i = len(qualities)
+	for i in reversed(xrange(max_i)):
+		q = ord(qualities[i]) - base
+		s += cutoff - q
+		if s < 0:
+			break
+		if s > max_qual:
+			max_qual = s
+			max_i = i
+	return max_i
+
+
+def nextseq_trim_index(sequence, cutoff, base=33):
+	"""
+	Variant of the above quality trimming routine that works on NextSeq data.
+	With Illumina NextSeq, bases are encoded with two colors. 'No color' (a
+	dark cycle) usually means that a 'G' was sequenced, but that also occurs
+	when sequencing falls off the end of the fragment. The read then contains
+	a run of high-quality G bases in the end.
+
+	This routine works as the one above, but counts qualities belonging to 'G'
+	bases as being equal to cutoff - 1.
+	"""
+	bases = sequence.sequence
+	qualities = sequence.qualities
+	s = 0
+	max_qual = 0
+	max_i = len(qualities)
+	for i in reversed(xrange(max_i)):
+		q = ord(qualities[i]) - base
+		if bases[i] == 'G':
+			q = cutoff - 1
+		s += cutoff - q
+		if s < 0:
+			break
+		if s > max_qual:
+			max_qual = s
+			max_i = i
+	return max_i
+
+try:
+	from cutadapt._qualtrim import quality_trim_index, nextseq_trim_index
+except:
+	pass
diff --git a/cutadapt/report.py b/cutadapt/report.py
new file mode 100644
index 0000000..35b3641
--- /dev/null
+++ b/cutadapt/report.py
@@ -0,0 +1,296 @@
+# coding: utf-8
+"""
+Routines for printing a report.
+"""
+from __future__ import print_function, division, absolute_import
+
+import sys
+from collections import namedtuple
+from contextlib import contextmanager
+import textwrap
+from .adapters import BACK, FRONT, PREFIX, SUFFIX, ANYWHERE, LINKED
+from .modifiers import QualityTrimmer, AdapterCutter
+from .filters import (NoFilter, PairedNoFilter, TooShortReadFilter, TooLongReadFilter,
+	DiscardTrimmedFilter, DiscardUntrimmedFilter, Demultiplexer, NContentFilter)
+
+
+class Statistics:
+	def __init__(self, n, total_bp1, total_bp2):
+		"""
+		n -- total number of reads
+		total_bp1 -- number of bases in first reads
+		total_bp2 -- number of bases in second reads (set to None for single-end data)
+		"""
+		self.n = n
+		self.total_bp = total_bp1
+		self.total_bp1 = total_bp1
+		if total_bp2 is None:
+			self.paired = False
+		else:
+			self.paired = True
+			self.total_bp2 = total_bp2
+			self.total_bp += total_bp2
+
+	def collect(self, adapters_pair, time, modifiers, modifiers2, writers):
+		self.time = max(time, 0.01)
+		self.too_short = None
+		self.too_long = None
+		self.written = 0
+		self.written_bp = [0, 0]
+		self.too_many_n = None
+		# Collect statistics from writers/filters
+		for w in writers:
+			if isinstance(w, (NoFilter, PairedNoFilter, Demultiplexer)) or isinstance(w.filter, (DiscardTrimmedFilter, DiscardUntrimmedFilter)):
+				self.written += w.written
+				if self.n > 0:
+					self.written_fraction = self.written / self.n
+				self.written_bp = self.written_bp[0] + w.written_bp[0], self.written_bp[1] + w.written_bp[1]
+			elif isinstance(w.filter, TooShortReadFilter):
+				self.too_short = w.filtered
+			elif isinstance(w.filter, TooLongReadFilter):
+				self.too_long = w.filtered
+			elif isinstance(w.filter, NContentFilter):
+				self.too_many_n = w.filtered
+		assert self.written is not None
+
+		# Collect statistics from modifiers
+		self.with_adapters = [0, 0]
+		self.quality_trimmed_bp = [0, 0]
+		self.did_quality_trimming = False
+		for i, modifiers_list in [(0, modifiers), (1, modifiers2)]:
+			for modifier in modifiers_list:
+				if isinstance(modifier, QualityTrimmer):
+					self.quality_trimmed_bp[i] = modifier.trimmed_bases
+					self.did_quality_trimming = True
+				elif isinstance(modifier, AdapterCutter):
+					self.with_adapters[i] += modifier.with_adapters
+		self.with_adapters_fraction = [ (v / self.n if self.n > 0 else 0) for v in self.with_adapters ]
+		self.quality_trimmed = sum(self.quality_trimmed_bp)
+		self.quality_trimmed_fraction = self.quality_trimmed / self.total_bp if self.total_bp > 0 else 0.0
+
+		self.total_written_bp = sum(self.written_bp)
+		self.total_written_bp_fraction = self.total_written_bp / self.total_bp if self.total_bp > 0 else 0.0
+
+		if self.n > 0:
+			if self.too_short is not None:
+				self.too_short_fraction = self.too_short / self.n
+			if self.too_long is not None:
+				self.too_long_fraction = self.too_long / self.n
+			if self.too_many_n is not None:
+				self.too_many_n_fraction = self.too_many_n / self.n
+
+
+ADAPTER_TYPES = {
+	BACK: "regular 3'",
+	FRONT: "regular 5'",
+	PREFIX: "anchored 5'",
+	SUFFIX: "anchored 3'",
+	ANYWHERE: "variable 5'/3'",
+	LINKED: "linked",
+}
+
+
+def print_error_ranges(adapter_length, error_rate):
+	print("No. of allowed errors:")
+	prev = 0
+	for errors in range(1, int(error_rate * adapter_length) + 1):
+		r = int(errors / error_rate)
+		print("{0}-{1} bp: {2};".format(prev, r - 1, errors - 1), end=' ')
+		prev = r
+	if prev == adapter_length:
+		print("{0} bp: {1}".format(adapter_length, int(error_rate * adapter_length)))
+	else:
+		print("{0}-{1} bp: {2}".format(prev, adapter_length, int(error_rate * adapter_length)))
+	print()
+
+
+def print_histogram(d, adapter_length, n, error_rate, errors):
+	"""
+	Print a histogram. Also, print the no. of reads expected to be
+	trimmed by chance (assuming a uniform distribution of nucleotides in the reads).
+	d -- a dictionary mapping lengths of trimmed sequences to their respective frequency
+	adapter_length -- adapter length
+	n -- total no. of reads.
+	"""
+	h = []
+	for length in sorted(d):
+		# when length surpasses adapter_length, the
+		# probability does not increase anymore
+		estimated = n * 0.25 ** min(length, adapter_length)
+		h.append( (length, d[length], estimated) )
+
+	print("length", "count", "expect", "max.err", "error counts", sep="\t")
+	for length, count, estimate in h:
+		max_errors = max(errors[length].keys())
+		errs = ' '.join(str(errors[length][e]) for e in range(max_errors+1))
+		print(length, count, "{0:.1F}".format(estimate), int(error_rate*min(length, adapter_length)), errs, sep="\t")
+	print()
+
+
+def print_adjacent_bases(bases, sequence):
+	"""
+	Print a summary of the bases preceding removed adapter sequences.
+	Print a warning if one of the bases is overrepresented and there are
+	at least 20 preceding bases available.
+
+	Return whether a warning was printed.
+	"""
+	total = sum(bases.values())
+	if total == 0:
+		return False
+	print('Bases preceding removed adapters:')
+	warnbase = None
+	for base in ['A', 'C', 'G', 'T', '']:
+		b = base if base != '' else 'none/other'
+		fraction = 1.0 * bases[base] / total
+		print('  {0}: {1:.1%}'.format(b, fraction))
+		if fraction > 0.8 and base != '':
+			warnbase = b
+	if total >= 20 and warnbase is not None:
+		print('WARNING:')
+		print('    The adapter is preceded by "{0}" extremely often.'.format(warnbase))
+		print('    The provided adapter sequence may be incomplete.')
+		print('    To fix the problem, add "{0}" to the beginning of the adapter sequence.'.format(warnbase))
+		print()
+		return True
+	print()
+	return False
+
+
+ at contextmanager
+def redirect_standard_output(file):
+	if file is None:
+		yield
+		return
+	old_stdout = sys.stdout
+	sys.stdout = file
+	yield
+	sys.stdout = old_stdout
+
+
+def print_report(stats, adapters_pair):
+	"""Print report to standard output."""
+	if stats.n == 0:
+		print("No reads processed! Either your input file is empty or you used the wrong -f/--format parameter.")
+		return
+	print("Finished in {0:.2F} s ({1:.0F} us/read; {2:.2F} M reads/minute).".format(
+		stats.time, 1E6 * stats.time / stats.n, stats.n / stats.time * 60 / 1E6))
+
+	report = "\n=== Summary ===\n\n"
+	if stats.paired:
+		report += textwrap.dedent("""\
+		Total read pairs processed:      {n:13,d}
+		  Read 1 with adapter:           {with_adapters[0]:13,d} ({with_adapters_fraction[0]:.1%})
+		  Read 2 with adapter:           {with_adapters[1]:13,d} ({with_adapters_fraction[1]:.1%})
+		""")
+	else:
+		report += textwrap.dedent("""\
+		Total reads processed:           {n:13,d}
+		Reads with adapters:             {with_adapters[0]:13,d} ({with_adapters_fraction[0]:.1%})
+		""")
+	if stats.too_short is not None:
+		report += "{pairs_or_reads} that were too short:       {too_short:13,d} ({too_short_fraction:.1%})\n"
+	if stats.too_long is not None:
+		report += "{pairs_or_reads} that were too long:        {too_long:13,d} ({too_long_fraction:.1%})\n"
+	if stats.too_many_n is not None:
+		report += "{pairs_or_reads} with too many N:           {too_many_n:13,d} ({too_many_n_fraction:.1%})\n"
+
+	report += textwrap.dedent("""\
+	{pairs_or_reads} written (passing filters): {written:13,d} ({written_fraction:.1%})
+
+	Total basepairs processed: {total_bp:13,d} bp
+	""")
+	if stats.paired:
+		report += "  Read 1: {total_bp1:13,d} bp\n"
+		report += "  Read 2: {total_bp2:13,d} bp\n"
+
+	if stats.did_quality_trimming:
+		report += "Quality-trimmed:           {quality_trimmed:13,d} bp ({quality_trimmed_fraction:.1%})\n"
+		if stats.paired:
+			report += "  Read 1: {quality_trimmed_bp[0]:13,d} bp\n"
+			report += "  Read 2: {quality_trimmed_bp[1]:13,d} bp\n"
+
+	report += "Total written (filtered):  {total_written_bp:13,d} bp ({total_written_bp_fraction:.1%})\n"
+	if stats.paired:
+		report += "  Read 1: {written_bp[0]:13,d} bp\n"
+		report += "  Read 2: {written_bp[1]:13,d} bp\n"
+	v = vars(stats)
+	v['pairs_or_reads'] = "Pairs" if stats.paired else "Reads"
+	try:
+		report = report.format(**v)
+	except ValueError:
+		# Python 2.6 does not support the comma format specifier (PEP 378)
+		report = report.replace(",d}", "d}").format(**v)
+	print(report)
+
+	warning = False
+	for which_in_pair in (0, 1):
+		for adapter in adapters_pair[which_in_pair]:
+			total_front = sum(adapter.lengths_front.values())
+			total_back = sum(adapter.lengths_back.values())
+			total = total_front + total_back
+			where = adapter.where
+			assert where in (ANYWHERE, LINKED) or (where in (BACK, SUFFIX) and total_front == 0) or (where in (FRONT, PREFIX) and total_back == 0)
+
+			if stats.paired:
+				extra = 'First read: ' if which_in_pair == 0 else 'Second read: '
+			else:
+				extra = ''
+
+			print("=" * 3, extra + "Adapter", adapter.name, "=" * 3)
+			print()
+			if where == LINKED:
+				print("Sequence: {0}...{1}; Type: linked; Length: {2}+{3}; Trimmed: {4} times; Half matches: {5}".
+					format(adapter.front_adapter.sequence,
+						adapter.back_adapter.sequence,
+						len(adapter.front_adapter.sequence),
+						len(adapter.back_adapter.sequence),
+						total_front, total_back))
+			else:
+				print("Sequence: {0}; Type: {1}; Length: {2}; Trimmed: {3} times.".
+					format(adapter.sequence, ADAPTER_TYPES[adapter.where],
+						len(adapter.sequence), total))
+			if total == 0:
+				print()
+				continue
+			if where == ANYWHERE:
+				print(total_front, "times, it overlapped the 5' end of a read")
+				print(total_back, "times, it overlapped the 3' end or was within the read")
+				print()
+				print_error_ranges(len(adapter), adapter.max_error_rate)
+				print("Overview of removed sequences (5')")
+				print_histogram(adapter.lengths_front, len(adapter), stats.n, adapter.max_error_rate, adapter.errors_front)
+				print()
+				print("Overview of removed sequences (3' or within)")
+				print_histogram(adapter.lengths_back, len(adapter), stats.n, adapter.max_error_rate, adapter.errors_back)
+			elif where == LINKED:
+				print()
+				print_error_ranges(len(adapter.front_adapter), adapter.front_adapter.max_error_rate)
+				print_error_ranges(len(adapter.back_adapter), adapter.back_adapter.max_error_rate)
+				print("Overview of removed sequences at 5' end")
+				print_histogram(adapter.front_adapter.lengths_front,
+					len(adapter.front_adapter), stats.n,
+					adapter.front_adapter.max_error_rate,
+					adapter.front_adapter.errors_front)
+				print()
+				print("Overview of removed sequences at 3' end")
+				print_histogram(adapter.back_adapter.lengths_back,
+					len(adapter.back_adapter), stats.n,
+					adapter.back_adapter.max_error_rate, adapter.back_adapter.errors_back)
+			elif where in (FRONT, PREFIX):
+				print()
+				print_error_ranges(len(adapter), adapter.max_error_rate)
+				print("Overview of removed sequences")
+				print_histogram(adapter.lengths_front, len(adapter), stats.n, adapter.max_error_rate, adapter.errors_front)
+			else:
+				assert where in (BACK, SUFFIX)
+				print()
+				print_error_ranges(len(adapter), adapter.max_error_rate)
+				warning = warning or print_adjacent_bases(adapter.adjacent_bases, adapter.sequence)
+				print("Overview of removed sequences")
+				print_histogram(adapter.lengths_back, len(adapter), stats.n, adapter.max_error_rate, adapter.errors_back)
+
+	if warning:
+		print('WARNING:')
+		print('    One or more of your adapter sequences may be incomplete.')
+		print('    Please see the detailed output above.')
diff --git a/cutadapt/scripts/__init__.py b/cutadapt/scripts/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/cutadapt/scripts/cutadapt.py b/cutadapt/scripts/cutadapt.py
new file mode 100755
index 0000000..7a7b0af
--- /dev/null
+++ b/cutadapt/scripts/cutadapt.py
@@ -0,0 +1,726 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# kate: word-wrap off; remove-trailing-spaces all;
+#
+# Copyright (c) 2010-2016 Marcel Martin <marcel.martin at scilifelab.se>
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+
+"""
+cutadapt version %version
+Copyright (C) 2010-2016 Marcel Martin <marcel.martin at scilifelab.se>
+
+cutadapt removes adapter sequences from high-throughput sequencing reads.
+
+Usage:
+    cutadapt -a ADAPTER [options] [-o output.fastq] input.fastq
+
+For paired-end reads:
+    cutadapt -a ADAPT1 -A ADAPT2 [options] -o out1.fastq -p out2.fastq in1.fastq in2.fastq
+
+Replace "ADAPTER" with the actual sequence of your 3' adapter. IUPAC wildcard
+characters are supported. The reverse complement is *not* automatically
+searched. All reads from input.fastq will be written to output.fastq with the
+adapter sequence removed. Adapter matching is error-tolerant. Multiple adapter
+sequences can be given (use further -a options), but only the best-matching
+adapter will be removed.
+
+Input may also be in FASTA format. Compressed input and output is supported and
+auto-detected from the file name (.gz, .xz, .bz2). Use the file name '-' for
+standard input/output. Without the -o option, output is sent to standard output.
+
+Citation:
+
+Marcel Martin. Cutadapt removes adapter sequences from high-throughput
+sequencing reads. EMBnet.Journal, 17(1):10-12, May 2011.
+http://dx.doi.org/10.14806/ej.17.1.200
+
+Use "cutadapt --help" to see all command-line options.
+See http://cutadapt.readthedocs.org/ for full documentation.
+"""
+
+from __future__ import print_function, division, absolute_import
+
+# Print a helpful error message if the extension modules cannot be imported.
+from cutadapt import check_importability
+check_importability()
+
+import sys
+import time
+import errno
+from optparse import OptionParser, OptionGroup, SUPPRESS_HELP
+import functools
+import logging
+import platform
+import textwrap
+
+from cutadapt import seqio, __version__
+from cutadapt.xopen import xopen
+from cutadapt.adapters import AdapterParser
+from cutadapt.modifiers import (LengthTagModifier, SuffixRemover, PrefixSuffixAdder,
+	DoubleEncoder, ZeroCapper, PrimerTrimmer, QualityTrimmer, UnconditionalCutter,
+	NEndTrimmer, AdapterCutter, NextseqQualityTrimmer)
+from cutadapt.filters import (NoFilter, PairedNoFilter, Redirector, PairedRedirector,
+	LegacyPairedRedirector, TooShortReadFilter, TooLongReadFilter,
+	Demultiplexer, NContentFilter, DiscardUntrimmedFilter, DiscardTrimmedFilter)
+from cutadapt.report import Statistics, print_report, redirect_standard_output
+from cutadapt.compat import next
+
+logger = logging.getLogger()
+
+class CutadaptOptionParser(OptionParser):
+	def get_usage(self):
+		return self.usage.lstrip().replace('%version', __version__)
+
+
+class RestFileWriter(object):
+	def __init__(self, file):
+		self.file = file
+
+	def write(self, match):
+		rest = match.rest()
+		if len(rest) > 0:
+			print(rest, match.read.name, file=self.file)
+
+
+def process_single_reads(reader, modifiers, filters):
+	"""
+	Loop over reads, find adapters, trim reads, apply modifiers and
+	output modified reads.
+
+	Return a Statistics object.
+	"""
+	n = 0  # no. of processed reads
+	total_bp = 0
+	for read in reader:
+		n += 1
+		total_bp += len(read.sequence)
+		for modifier in modifiers:
+			read = modifier(read)
+		for filter in filters:
+			if filter(read):
+				break
+
+	return Statistics(n=n, total_bp1=total_bp, total_bp2=None)
+
+
+def process_paired_reads(paired_reader, modifiers1, modifiers2, filters):
+	"""
+	Loop over reads, find adapters, trim reads, apply modifiers and
+	output modified reads.
+
+	Return a Statistics object.
+	"""
+	n = 0  # no. of processed reads
+	total1_bp = 0
+	total2_bp = 0
+	for read1, read2 in paired_reader:
+		n += 1
+		total1_bp += len(read1.sequence)
+		total2_bp += len(read2.sequence)
+		for modifier in modifiers1:
+			read1 = modifier(read1)
+		for modifier in modifiers2:
+			read2 = modifier(read2)
+		for filter in filters:
+			# Stop writing as soon as one of the filters was successful.
+			if filter(read1, read2):
+				break
+	return Statistics(n=n, total_bp1=total1_bp, total_bp2=total2_bp)
+
+
+def setup_logging(stdout=False, quiet=False):
+	"""
+	Attach handler to the global logger object
+	"""
+	# Due to backwards compatibility, logging output is sent to standard output
+	# instead of standard error if the -o option is used.
+	stream_handler = logging.StreamHandler(sys.stdout if stdout else sys.stderr)
+	stream_handler.setFormatter(logging.Formatter('%(message)s'))
+	stream_handler.setLevel(logging.ERROR if quiet else logging.INFO)
+	logger.setLevel(logging.INFO)
+	logger.addHandler(stream_handler)
+
+
+def get_option_parser():
+	parser = CutadaptOptionParser(usage=__doc__, version=__version__)
+
+	parser.add_option("--debug", action='store_true', default=False,
+		help="Print debugging information.")
+	parser.add_option("-f", "--format",
+		help="Input file format; can be either 'fasta', 'fastq' or 'sra-fastq'. "
+			"Ignored when reading csfasta/qual files. Default: auto-detect "
+			"from file name extension.")
+
+	group = OptionGroup(parser, "Finding adapters:",
+		description="Parameters -a, -g, -b specify adapters to be removed from "
+			"each read (or from the first read in a pair if data is paired). "
+			"If specified multiple times, only the best matching adapter is "
+			"trimmed (but see the --times option). When the special notation "
+			"'file:FILE' is used, adapter sequences are read from the given "
+			"FASTA file.")
+	group.add_option("-a", "--adapter", action="append", default=[], metavar="ADAPTER",
+		dest="adapters",
+		help="Sequence of an adapter ligated to the 3' end (paired data: of the "
+			"first read). The adapter and subsequent bases are trimmed. If a "
+			"'$' character is appended ('anchoring'), the adapter is only "
+			"found if it is a suffix of the read.")
+	group.add_option("-g", "--front", action="append", default=[], metavar="ADAPTER",
+		help="Sequence of an adapter ligated to the 5' end (paired data: of the "
+			"first read). The adapter and any preceding bases are trimmed. "
+			"Partial matches at the 5' end are allowed. If a '^' character is "
+			"prepended ('anchoring'), the adapter is only found if it is a "
+			"prefix of the read.")
+	group.add_option("-b", "--anywhere", action="append", default=[], metavar="ADAPTER",
+		help="Sequence of an adapter that may be ligated to the 5' or 3' end "
+			"(paired data: of the first read). Both types of matches as "
+			"described under -a und -g are allowed. If the first base of the "
+			"read is part of the match, the behavior is as with -g, otherwise "
+			"as with -a. This option is mostly for rescuing failed library "
+			"preparations - do not use if you know which end your adapter was "
+			"ligated to!")
+	group.add_option("-e", "--error-rate", type=float, default=0.1,
+		help="Maximum allowed error rate (no. of errors divided by the length "
+			"of the matching region). Default: %default")
+	group.add_option("--no-indels", action='store_false', dest='indels', default=True,
+		help="Allow only mismatches in alignments. "
+			"Default: allow both mismatches and indels")
+	group.add_option("-n", "--times", type=int, metavar="COUNT", default=1,
+		help="Remove up to COUNT adapters from each read. Default: %default")
+	group.add_option("-O", "--overlap", type=int, metavar="MINLENGTH", default=3,
+		help="If the overlap between the read and the adapter is shorter than "
+			"MINLENGTH, the read is not modified. Reduces the no. of bases "
+			"trimmed due to random adapter matches. Default: %default")
+	group.add_option("--match-read-wildcards", action="store_true", default=False,
+		help="Interpret IUPAC wildcards in reads. Default: %default")
+	group.add_option("-N", "--no-match-adapter-wildcards", action="store_false",
+		default=True, dest='match_adapter_wildcards',
+		help="Do not interpret IUPAC wildcards in adapters.")
+	group.add_option("--no-trim", dest='action', action='store_const', const=None,
+		help="Match and redirect reads to output/untrimmed-output as usual, "
+			"but do not remove adapters.")
+	group.add_option("--mask-adapter", dest='action', action='store_const', const='mask',
+		help="Mask adapters with 'N' characters instead of trimming them.")
+	parser.add_option_group(group)
+
+	group = OptionGroup(parser, "Additional read modifications")
+	group.add_option("-u", "--cut", action='append', default=[], type=int, metavar="LENGTH",
+		help="Remove bases from each read (first read only if paired). "
+			"If LENGTH is positive, remove bases from the beginning. "
+			"If LENGTH is negative, remove bases from the end. "
+			"Can be used twice if LENGTHs have different signs.")
+	group.add_option("-q", "--quality-cutoff", default=None, metavar="[5'CUTOFF,]3'CUTOFF",
+		help="Trim low-quality bases from 5' and/or 3' ends of each read before "
+			"adapter removal. Applied to both reads if data is paired. If one "
+			"value is given, only the 3' end is trimmed. If two "
+			"comma-separated cutoffs are given, the 5' end is trimmed with "
+			"the first cutoff, the 3' end with the second.")
+	group.add_option("--nextseq-trim", type=int, default=None, metavar="3'CUTOFF",
+		help="NextSeq-specific quality trimming (each read). Trims also dark "
+			"cycles appearing as high-quality G bases (EXPERIMENTAL).")
+	group.add_option("--quality-base", type=int, default=33,
+		help="Assume that quality values in FASTQ are encoded as ascii(quality "
+			"+ QUALITY_BASE). This needs to be set to 64 for some old Illumina "
+			"FASTQ files. Default: %default")
+	group.add_option("--trim-n", action='store_true', default=False,
+		help="Trim N's on ends of reads.")
+	group.add_option("-x", "--prefix", default='',
+		help="Add this prefix to read names. Use {name} to insert the name of the matching adapter.")
+	group.add_option("-y", "--suffix", default='',
+		help="Add this suffix to read names; can also include {name}")
+	group.add_option("--strip-suffix", action='append', default=[],
+		help="Remove this suffix from read names if present. Can be given multiple times.")
+	group.add_option("--length-tag", metavar="TAG",
+		help="Search for TAG followed by a decimal number in the description "
+			"field of the read. Replace the decimal number with the correct "
+			"length of the trimmed read. For example, use --length-tag 'length=' "
+			"to correct fields like 'length=123'.")
+	parser.add_option_group(group)
+
+	group = OptionGroup(parser, "Filtering of processed reads")
+	group.add_option("--discard-trimmed", "--discard", action='store_true', default=False,
+		help="Discard reads that contain an adapter. Also use -O to avoid "
+			"discarding too many randomly matching reads!")
+	group.add_option("--discard-untrimmed", "--trimmed-only", action='store_true', default=False,
+		help="Discard reads that do not contain the adapter.")
+	group.add_option("-m", "--minimum-length", type=int, default=0, metavar="LENGTH",
+		help="Discard trimmed reads that are shorter than LENGTH. Reads that "
+			"are too short even before adapter removal are also discarded. In "
+			"colorspace, an initial primer is not counted. Default: 0")
+	group.add_option("-M", "--maximum-length", type=int, default=sys.maxsize, metavar="LENGTH",
+		help="Discard trimmed reads that are longer than LENGTH. "
+			"Reads that are too long even before adapter removal "
+			"are also discarded. In colorspace, an initial primer "
+			"is not counted. Default: no limit")
+	group.add_option("--max-n", type=float, default=-1.0, metavar="COUNT",
+		help="Discard reads with too many N bases. If COUNT is an integer, it "
+			"is treated as the absolute number of N bases. If it is between 0 "
+			"and 1, it is treated as the proportion of N's allowed in a read.")
+	parser.add_option_group(group)
+
+	group = OptionGroup(parser, "Output")
+	group.add_option("--quiet", default=False, action='store_true',
+		help="Print only error messages.")
+	group.add_option("-o", "--output", metavar="FILE",
+		help="Write trimmed reads to FILE. FASTQ or FASTA format is chosen "
+			"depending on input. The summary report is sent to standard output. "
+			"Use '{name}' in FILE to demultiplex reads into multiple "
+			"files. Default: write to standard output")
+	group.add_option("--info-file", metavar="FILE",
+		help="Write information about each read and its adapter matches into FILE. "
+			"See the documentation for the file format.")
+	group.add_option("-r", "--rest-file", metavar="FILE",
+		help="When the adapter matches in the middle of a read, write the "
+			"rest (after the adapter) into FILE.")
+	group.add_option("--wildcard-file", metavar="FILE",
+		help="When the adapter has N bases (wildcards), write adapter bases "
+			"matching wildcard positions to FILE. When there are indels in the "
+			"alignment, this will often not be accurate.")
+	group.add_option("--too-short-output", metavar="FILE",
+		help="Write reads that are too short (according to length specified by "
+		"-m) to FILE. Default: discard reads")
+	group.add_option("--too-long-output", metavar="FILE",
+		help="Write reads that are too long (according to length specified by "
+		"-M) to FILE. Default: discard reads")
+	group.add_option("--untrimmed-output", default=None, metavar="FILE",
+		help="Write reads that do not contain the adapter to FILE. Default: "
+			"output to same file as trimmed reads")
+	parser.add_option_group(group)
+
+	group = OptionGroup(parser, "Colorspace options")
+	group.add_option("-c", "--colorspace", action='store_true', default=False,
+		help="Enable colorspace mode: Also trim the color that is adjacent to the found adapter.")
+	group.add_option("-d", "--double-encode", action='store_true', default=False,
+		help="Double-encode colors (map 0,1,2,3,4 to A,C,G,T,N).")
+	group.add_option("-t", "--trim-primer", action='store_true', default=False,
+		help="Trim primer base and the first color (which is the transition "
+			"to the first nucleotide)")
+	group.add_option("--strip-f3", action='store_true', default=False,
+		help="Strip the _F3 suffix of read names")
+	group.add_option("--maq", "--bwa", action='store_true', default=False,
+		help="MAQ- and BWA-compatible colorspace output. This enables -c, -d, "
+			"-t, --strip-f3 and -y '/1'.")
+	group.add_option("--no-zero-cap", dest='zero_cap', action='store_false',
+		help="Do not change negative quality values to zero in colorspace "
+			"data. By default, they are since many tools have problems with "
+			"negative qualities.")
+	group.add_option("--zero-cap", "-z", action='store_true',
+		help="Change negative quality values to zero. This is enabled "
+		"by default when -c/--colorspace is also enabled. Use the above option "
+		"to disable it.")
+	parser.set_defaults(zero_cap=None, action='trim')
+	parser.add_option_group(group)
+
+	group = OptionGroup(parser, "Paired-end options", description="The "
+		"-A/-G/-B/-U options work like their -a/-b/-g/-u counterparts, but "
+		"are applied to the second read in each pair.")
+	group.add_option("-A", dest='adapters2', action='append', default=[], metavar='ADAPTER',
+		help="3' adapter to be removed from second read in a pair.")
+	group.add_option("-G", dest='front2', action='append', default=[], metavar='ADAPTER',
+		help="5' adapter to be removed from second read in a pair.")
+	group.add_option("-B", dest='anywhere2', action='append', default=[], metavar='ADAPTER',
+		help="5'/3 adapter to be removed from second read in a pair.")
+	group.add_option("-U", dest='cut2', action='append', default=[], type=int, metavar="LENGTH",
+		help="Remove LENGTH bases from second read in a pair (see --cut).")
+	group.add_option("-p", "--paired-output", metavar="FILE",
+		help="Write second read in a pair to FILE.")
+	# Setting the default for pair_filter to None allows us to find out whether
+	# the option was used at all.
+	group.add_option("--pair-filter", metavar='(any|both)', default=None,
+		choices=("any", "both"),
+		help="Which of the reads in a paired-end read have to match the "
+			"filtering criterion in order for it to be filtered. "
+			"Default: any")
+	group.add_option("--interleaved", action='store_true', default=False,
+		help="Read and write interleaved paired-end reads.")
+	group.add_option("--untrimmed-paired-output", metavar="FILE",
+		help="Write second read in a pair to this FILE when no adapter "
+			"was found in the first read. Use this option together with "
+			"--untrimmed-output when trimming paired-end reads. Default: output "
+			"to same file as trimmed reads")
+	group.add_option("--too-short-paired-output", metavar="FILE", default=None,
+		help="Write second read in a pair to this file if pair is too short. "
+			"Use together with --too-short-output.")
+	group.add_option("--too-long-paired-output", metavar="FILE", default=None,
+		help="Write second read in a pair to this file if pair is too long. "
+			"Use together with --too-long-output.")
+	parser.add_option_group(group)
+
+	return parser
+
+
+def main(cmdlineargs=None, default_outfile=sys.stdout):
+	"""
+	Main function that evaluates command-line parameters and iterates
+	over all reads.
+
+	default_outfile is the file to which trimmed reads are sent if the ``-o``
+	parameter is not used.
+	"""
+	parser = get_option_parser()
+	if cmdlineargs is None:
+		cmdlineargs = sys.argv[1:]
+	options, args = parser.parse_args(args=cmdlineargs)
+	# Setup logging only if there are not already any handlers (can happen when
+	# this function is being called externally such as from unit tests)
+	if not logging.root.handlers:
+		setup_logging(stdout=bool(options.output), quiet=options.quiet)
+
+	if len(args) == 0:
+		parser.error("At least one parameter needed: name of a FASTA or FASTQ file.")
+	elif len(args) > 2:
+		parser.error("Too many parameters.")
+	input_filename = args[0]
+	if input_filename.endswith('.qual'):
+		parser.error("If a .qual file is given, it must be the second argument.")
+
+	# Find out which 'mode' we need to use.
+	# Default: single-read trimming (neither -p nor -A/-G/-B/-U/--interleaved given)
+	paired = False
+	if options.paired_output:
+		# Modify first read only, keep second in sync (-p given, but not -A/-G/-B/-U).
+		# This exists for backwards compatibility ('legacy mode').
+		paired = 'first'
+	# Any of these options switch off legacy mode
+	if (options.adapters2 or options.front2 or options.anywhere2 or
+		options.cut2 or options.interleaved or options.pair_filter or
+		options.too_short_paired_output or options.too_long_paired_output):
+		# Full paired-end trimming when both -p and -A/-G/-B/-U given
+		# Read modifications (such as quality trimming) are applied also to second read.
+		paired = 'both'
+
+	if paired and len(args) == 1 and not options.interleaved:
+		parser.error("When paired-end trimming is enabled via -A/-G/-B/-U or -p, "
+			"two input files are required.")
+	if options.interleaved and len(args) != 1:
+		parser.error("When reading interleaved files, only one input file may "
+			"be given.")
+	if not paired:
+		if options.untrimmed_paired_output:
+			parser.error("Option --untrimmed-paired-output can only be used when "
+				"trimming paired-end reads (with option -p).")
+
+	# Assign input_paired_filename and quality_filename
+	input_paired_filename = None
+	quality_filename = None
+	if paired:
+		if not options.interleaved:
+			input_paired_filename = args[1]
+			if not options.paired_output:
+				parser.error("When paired-end trimming is enabled via -A/-G/-B/-U, "
+					"a second output file needs to be specified via -p (--paired-output).")
+			if not options.output:
+				parser.error("When you use -p or --paired-output, you must also "
+					"use the -o option.")
+			if bool(options.untrimmed_output) != bool(options.untrimmed_paired_output):
+				parser.error("When trimming paired-end reads, you must use either none "
+					"or both of the --untrimmed-output/--untrimmed-paired-output options.")
+			if options.too_short_output and not options.too_short_paired_output:
+				parser.error("When using --too-short-output with paired-end "
+					"reads, you also need to use --too-short-paired-output")
+			if options.too_long_output and not options.too_long_paired_output:
+				parser.error("When using --too-long-output with paired-end "
+					"reads, you also need to use --too-long-paired-output")
+	elif len(args) == 2:
+		quality_filename = args[1]
+		if options.format is not None:
+			parser.error("If a pair of .fasta and .qual files is given, the -f/--format parameter cannot be used.")
+
+	if options.format is not None and options.format.lower() not in ['fasta', 'fastq', 'sra-fastq']:
+		parser.error("The input file format must be either 'fasta', 'fastq' or "
+			"'sra-fastq' (not '{0}').".format(options.format))
+
+	# Open input file(s)
+	try:
+		reader = seqio.open(input_filename, file2=input_paired_filename,
+				qualfile=quality_filename, colorspace=options.colorspace,
+				fileformat=options.format, interleaved=options.interleaved)
+	except (seqio.UnknownFileType, IOError) as e:
+		parser.error(e)
+
+	if options.quality_cutoff is not None:
+		cutoffs = options.quality_cutoff.split(',')
+		if len(cutoffs) == 1:
+			try:
+				cutoffs = [0, int(cutoffs[0])]
+			except ValueError as e:
+				parser.error("Quality cutoff value not recognized: {0}".format(e))
+		elif len(cutoffs) == 2:
+			try:
+				cutoffs = [int(cutoffs[0]), int(cutoffs[1])]
+			except ValueError as e:
+				parser.error("Quality cutoff value not recognized: {0}".format(e))
+		else:
+			parser.error("Expected one value or two values separated by comma for the quality cutoff")
+	else:
+		cutoffs = None
+
+	open_writer = functools.partial(seqio.open, mode='w',
+		qualities=reader.delivers_qualities, colorspace=options.colorspace,
+		interleaved=options.interleaved)
+
+	if options.pair_filter is None:
+		options.pair_filter = 'any'
+	min_affected = 2 if options.pair_filter == 'both' else 1
+	if not paired:
+		filter_wrapper = Redirector
+	elif paired == 'first':
+		filter_wrapper = LegacyPairedRedirector
+	elif paired == 'both':
+		filter_wrapper = functools.partial(PairedRedirector, min_affected=min_affected)
+	filters = []
+	# TODO open_files = []
+	too_short_writer = None  # too short reads go here
+	# TODO pass file name to TooShortReadFilter, add a .close() method?
+	if options.minimum_length > 0:
+		if options.too_short_output:
+			too_short_writer = open_writer(options.too_short_output, options.too_short_paired_output)
+		filters.append(filter_wrapper(too_short_writer, TooShortReadFilter(options.minimum_length)))
+	too_long_writer = None  # too long reads go here
+	if options.maximum_length < sys.maxsize:
+		if options.too_long_output is not None:
+			too_long_writer = open_writer(options.too_long_output, options.too_long_paired_output)
+		filters.append(filter_wrapper(too_long_writer, TooLongReadFilter(options.maximum_length)))
+
+	if options.max_n != -1:
+		filters.append(filter_wrapper(None, NContentFilter(options.max_n)))
+
+	if int(options.discard_trimmed) + int(options.discard_untrimmed) + int(options.untrimmed_output is not None) > 1:
+		parser.error("Only one of the --discard-trimmed, --discard-untrimmed "
+			"and --untrimmed-output options can be used at the same time.")
+	demultiplexer = None
+	untrimmed_writer = None
+	writer = None
+	if options.output is not None and '{name}' in options.output:
+		if options.discard_trimmed:
+			parser.error("Do not use --discard-trimmed when demultiplexing.")
+		if paired:
+			parser.error("Demultiplexing not supported for paired-end files, yet.")
+		untrimmed = options.output.replace('{name}', 'unknown')
+		if options.untrimmed_output:
+			untrimmed = options.untrimmed_output
+		if options.discard_untrimmed:
+			untrimmed = None
+		demultiplexer = Demultiplexer(options.output, untrimmed,
+			qualities=reader.delivers_qualities, colorspace=options.colorspace)
+		filters.append(demultiplexer)
+	else:
+		# Set up the remaining filters to deal with --discard-trimmed,
+		# --discard-untrimmed and --untrimmed-output. These options
+		# are mutually exclusive in order to avoid brain damage.
+		if options.discard_trimmed:
+			filters.append(filter_wrapper(None, DiscardTrimmedFilter()))
+		elif options.discard_untrimmed:
+			filters.append(filter_wrapper(None, DiscardUntrimmedFilter()))
+		elif options.untrimmed_output:
+			untrimmed_writer = open_writer(options.untrimmed_output,
+				options.untrimmed_paired_output)
+			filters.append(filter_wrapper(untrimmed_writer, DiscardUntrimmedFilter()))
+
+		# Finally, figure out where the reads that passed all the previous
+		# filters should go.
+		if options.output is not None:
+			writer = open_writer(options.output, options.paired_output)
+		else:
+			writer = open_writer(default_outfile)
+		if not paired:
+			filters.append(NoFilter(writer))
+		else:
+			filters.append(PairedNoFilter(writer))
+
+	if options.maq:
+		options.colorspace = True
+		options.double_encode = True
+		options.trim_primer = True
+		options.strip_suffix.append('_F3')
+		options.suffix = "/1"
+	if options.zero_cap is None:
+		options.zero_cap = options.colorspace
+	if options.trim_primer and not options.colorspace:
+		parser.error("Trimming the primer makes only sense in colorspace.")
+	if options.double_encode and not options.colorspace:
+		parser.error("Double-encoding makes only sense in colorspace.")
+	if options.anywhere and options.colorspace:
+		parser.error("Using --anywhere with colorspace reads is currently not supported (if you think this may be useful, contact the author).")
+	if not (0 <= options.error_rate <= 1.):
+		parser.error("The maximum error rate must be between 0 and 1.")
+	if options.overlap < 1:
+		parser.error("The overlap must be at least 1.")
+
+	if options.rest_file is not None:
+		options.rest_file = xopen(options.rest_file, 'w')
+		rest_writer = RestFileWriter(options.rest_file)
+	else:
+		rest_writer = None
+	if options.info_file is not None:
+		options.info_file = xopen(options.info_file, 'w')
+	if options.wildcard_file is not None:
+		options.wildcard_file = xopen(options.wildcard_file, 'w')
+
+	if options.colorspace:
+		if options.match_read_wildcards:
+			parser.error('IUPAC wildcards not supported in colorspace')
+		options.match_adapter_wildcards = False
+
+	adapter_parser = AdapterParser(
+		colorspace=options.colorspace,
+		max_error_rate=options.error_rate,
+		min_overlap=options.overlap,
+		read_wildcards=options.match_read_wildcards,
+		adapter_wildcards=options.match_adapter_wildcards,
+		indels=options.indels)
+
+	try:
+		adapters = adapter_parser.parse_multi(options.adapters, options.anywhere, options.front)
+		adapters2 = adapter_parser.parse_multi(options.adapters2, options.anywhere2, options.front2)
+	except IOError as e:
+		if e.errno == errno.ENOENT:
+			parser.error(e)
+		raise
+	except ValueError as e:
+		parser.error(e)
+	if options.debug:
+		for adapter in adapters + adapters2:
+			adapter.enable_debug()
+
+	if not adapters and not adapters2 and not cutoffs and \
+			options.nextseq_trim is None and \
+			options.cut == [] and options.cut2 == [] and \
+			options.minimum_length == 0 and \
+			options.maximum_length == sys.maxsize and \
+			quality_filename is None and \
+			options.max_n == -1 and not options.trim_n:
+		parser.error("You need to provide at least one adapter sequence.")
+
+	# Create the single-end processing pipeline (a list of "modifiers")
+	modifiers = []
+	if options.cut:
+		if len(options.cut) > 2:
+			parser.error("You cannot remove bases from more than two ends.")
+		if len(options.cut) == 2 and options.cut[0] * options.cut[1] > 0:
+			parser.error("You cannot remove bases from the same end twice.")
+		for cut in options.cut:
+			if cut != 0:
+				modifiers.append(UnconditionalCutter(cut))
+
+	if options.nextseq_trim is not None:
+		modifiers.append(NextseqQualityTrimmer(options.nextseq_trim, options.quality_base))
+
+	if cutoffs:
+		modifiers.append(QualityTrimmer(cutoffs[0], cutoffs[1], options.quality_base))
+	if adapters:
+		adapter_cutter = AdapterCutter(adapters, options.times,
+				options.wildcard_file, options.info_file,
+				rest_writer, options.action)
+		modifiers.append(adapter_cutter)
+
+	# Modifiers that apply to both reads of paired-end reads unless in legacy mode
+	modifiers_both = []
+	if options.trim_n:
+		modifiers_both.append(NEndTrimmer())
+	if options.length_tag:
+		modifiers_both.append(LengthTagModifier(options.length_tag))
+	if options.strip_f3:
+		options.strip_suffix.append('_F3')
+	for suffix in options.strip_suffix:
+		modifiers_both.append(SuffixRemover(suffix))
+	if options.prefix or options.suffix:
+		modifiers_both.append(PrefixSuffixAdder(options.prefix, options.suffix))
+	if options.double_encode:
+		modifiers_both.append(DoubleEncoder())
+	if options.zero_cap and reader.delivers_qualities:
+		modifiers_both.append(ZeroCapper(quality_base=options.quality_base))
+	if options.trim_primer:
+		modifiers_both.append(PrimerTrimmer)
+	modifiers.extend(modifiers_both)
+
+	# For paired-end data, create a second processing pipeline.
+	# However, if no second-read adapters were given (via -A/-G/-B/-U), we need to
+	# be backwards compatible and *no modifications* are done to the second read.
+	modifiers2 = []
+	if paired == 'both':
+		if options.cut2:
+			if len(options.cut2) > 2:
+				parser.error("You cannot remove bases from more than two ends.")
+			if len(options.cut2) == 2 and options.cut2[0] * options.cut2[1] > 0:
+				parser.error("You cannot remove bases from the same end twice.")
+			for cut in options.cut2:
+				if cut != 0:
+					modifiers2.append(UnconditionalCutter(cut))
+
+		if cutoffs:
+			modifiers2.append(QualityTrimmer(cutoffs[0], cutoffs[1], options.quality_base))
+		if adapters2:
+			adapter_cutter2 = AdapterCutter(adapters2, options.times,
+					None, None, None, options.action)
+			modifiers2.append(adapter_cutter2)
+		else:
+			adapter_cutter2 = None
+		modifiers2.extend(modifiers_both)
+
+	logger.info("This is cutadapt %s with Python %s", __version__, platform.python_version())
+	logger.info("Command line parameters: %s", " ".join(cmdlineargs))
+	logger.info("Trimming %s adapter%s with at most %.1f%% errors in %s mode ...",
+		len(adapters) + len(adapters2), 's' if len(adapters) + len(adapters2) != 1 else '',
+		options.error_rate * 100,
+		{ False: 'single-end', 'first': 'paired-end legacy', 'both': 'paired-end' }[paired])
+
+	if paired == 'first' and (modifiers_both or cutoffs):
+		logger.warning('\n'.join(textwrap.wrap('WARNING: Requested read '
+			'modifications are applied only to the first '
+			'read since backwards compatibility mode is enabled. '
+			'To modify both reads, also use any of the -A/-B/-G/-U options. '
+			'Use a dummy adapter sequence when necessary: -A XXX')))
+
+	start_time = time.clock()
+	try:
+		if paired:
+			stats = process_paired_reads(reader, modifiers, modifiers2, filters)
+		else:
+			stats = process_single_reads(reader, modifiers, filters)
+	except KeyboardInterrupt as e:
+		print("Interrupted", file=sys.stderr)
+		sys.exit(130)
+	except IOError as e:
+		if e.errno == errno.EPIPE:
+			sys.exit(1)
+		raise
+	except (seqio.FormatError, EOFError) as e:
+		sys.exit("cutadapt: error: {0}".format(e))
+
+	# close open files
+	for f in [writer, untrimmed_writer,
+			options.rest_file, options.wildcard_file,
+			options.info_file, too_short_writer, too_long_writer,
+			options.info_file, demultiplexer]:
+		if f is not None and f is not sys.stdin and f is not sys.stdout:
+			f.close()
+
+	elapsed_time = time.clock() - start_time
+	if not options.quiet:
+		stats.collect((adapters, adapters2), elapsed_time,
+			modifiers, modifiers2, filters)
+		# send statistics to stderr if result was sent to stdout
+		stat_file = sys.stderr if options.output is None else None
+		with redirect_standard_output(stat_file):
+			print_report(stats, (adapters, adapters2))
+
+
+if __name__ == '__main__':
+	main()
diff --git a/cutadapt/seqio.py b/cutadapt/seqio.py
new file mode 100644
index 0000000..28d6722
--- /dev/null
+++ b/cutadapt/seqio.py
@@ -0,0 +1,756 @@
+# coding: utf-8
+"""
+Sequence I/O classes: Reading and writing of FASTA and FASTQ files.
+
+TODO
+
+- Sequence.name should be Sequence.description or so (reserve .name for the part
+  before the first space)
+"""
+from __future__ import print_function, division, absolute_import
+import sys
+from os.path import splitext
+from .xopen import xopen
+from .compat import zip, basestring
+
+__author__ = "Marcel Martin"
+
+
+class FormatError(Exception):
+	"""
+	Raised when an input file (FASTA or FASTQ) is malformatted.
+	"""
+
+
+def _shorten(s, n=100):
+	"""Shorten string s to at most n characters, appending "..." if necessary."""
+	if s is None:
+		return None
+	if len(s) > n:
+		s = s[:n-3] + '...'
+	return s
+
+
+class Sequence(object):
+	"""qualities is a string and it contains the qualities encoded as ascii(qual+33)."""
+
+	def __init__(self, name, sequence, qualities=None, name2='', match=None, match_info=None):
+		"""Set qualities to None if there are no quality values"""
+		self.name = name
+		self.sequence = sequence
+		self.qualities = qualities
+		self.name2 = name2
+		self.match = match
+		self.match_info = match_info
+		self.original_length = len(sequence)
+		if qualities is not None:
+			if len(qualities) != len(sequence):
+				rname = _shorten(name)
+				raise FormatError("In read named {0!r}: Length of quality sequence ({1}) and "
+					"length of read ({2}) do not match".format(rname, len(qualities), len(sequence)))
+	
+	def __getitem__(self, key):
+		"""slicing"""
+		return self.__class__(
+			self.name,
+			self.sequence[key],
+			self.qualities[key] if self.qualities is not None else None,
+			self.name2,
+			self.match,
+			self.match_info)
+
+	def __repr__(self):
+		qstr = ''
+		if self.qualities is not None:
+			qstr = ', qualities={0!r}'.format(_shorten(self.qualities))
+		return '<Sequence(name={0!r}, sequence={1!r}{2})>'.format(
+			_shorten(self.name), _shorten(self.sequence), qstr)
+
+	def __len__(self):
+		return len(self.sequence)
+
+	def __eq__(self, other):
+		return self.name == other.name and \
+			self.sequence == other.sequence and \
+			self.qualities == other.qualities
+
+	def __ne__(self, other):
+		return not self.__eq__(other)
+
+
+class SequenceReader(object):
+	"""Read possibly compressed files containing sequences"""
+	_close_on_exit = False
+
+	def __init__(self, file):
+		"""
+		file is a path or a file-like object. In both cases, the file may
+		be compressed (.gz, .bz2, .xz).
+		"""
+		if isinstance(file, basestring):
+			file = xopen(file)
+			self._close_on_exit = True
+		self._file = file
+
+	def close(self):
+		if self._close_on_exit and self._file is not None:
+			self._file.close()
+			self._file = None
+
+	def __enter__(self):
+		if self._file is None:
+			raise ValueError("I/O operation on closed SequenceReader")
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+
+try:
+	from ._seqio import Sequence
+except ImportError:
+	pass
+
+
+class ColorspaceSequence(Sequence):
+	def __init__(self, name, sequence, qualities, primer=None, name2='', match=None, match_info=None):
+		# In colorspace, the first character is the last nucleotide of the primer base
+		# and the second character encodes the transition from the primer base to the
+		# first real base of the read.
+		if primer is None:
+			self.primer = sequence[0:1]
+			sequence = sequence[1:]
+		else:
+			self.primer = primer
+		if qualities is not None and len(sequence) != len(qualities):
+			rname = _shorten(name)
+			raise FormatError("In read named {0!r}: length of colorspace quality "
+				"sequence ({1}) and length of read ({2}) do not match (primer "
+				"is: {3!r})".format(rname, len(qualities), len(sequence), self.primer))
+		super(ColorspaceSequence, self).__init__(name, sequence, qualities, name2, match, match_info)
+		if not self.primer in ('A', 'C', 'G', 'T'):
+			raise FormatError("Primer base is {0!r} in read {1!r}, but it "
+				"should be one of A, C, G, T.".format(
+					self.primer, _shorten(name)))
+
+	def __repr__(self):
+		qstr = ''
+		if self.qualities is not None:
+			qstr = ', qualities={0!r}'.format(_shorten(self.qualities))
+		return '<ColorspaceSequence(name={0!r}, primer={1!r}, sequence={2!r}{3})>'.format(
+			_shorten(self.name), self.primer, _shorten(self.sequence), qstr)
+
+	def __getitem__(self, key):
+		return self.__class__(
+			self.name,
+			self.sequence[key],
+			self.qualities[key] if self.qualities is not None else None,
+			self.primer,
+			self.name2,
+			self.match,
+			self.match_info)
+
+
+def sra_colorspace_sequence(name, sequence, qualities, name2):
+	"""Factory for an SRA colorspace sequence (which has one quality value too many)"""
+	return ColorspaceSequence(name, sequence, qualities[1:], name2=name2)
+
+
+class FileWithPrependedLine(object):
+	"""
+	A file-like object that allows to "prepend" a single
+	line to an already opened file. That is, further
+	reads on the file will return the provided line and
+	only then the actual content. This is needed to solve
+	the problem of autodetecting input from a stream:
+	As soon as the first line has been read, we know
+	the file type, but also that line is "gone" and
+	unavailable for further processing.
+	"""
+	def __init__(self, file, line):
+		"""
+		file is an already opened file-like object.
+		line is a single string (newline will be appended if not included)
+		"""
+		if not line.endswith('\n'):
+			line += '\n'
+		self.first_line = line
+		self._file = file
+
+	def __iter__(self):
+		yield self.first_line
+		for line in self._file:
+			yield line
+
+	def close(self):
+		self._file.close()
+
+
+class FastaReader(SequenceReader):
+	"""
+	Reader for FASTA files.
+	"""
+	def __init__(self, file, keep_linebreaks=False, sequence_class=Sequence):
+		"""
+		file is a path or a file-like object. In both cases, the file may
+		be compressed (.gz, .bz2, .xz).
+
+		keep_linebreaks -- whether to keep newline characters in the sequence
+		"""
+		super(FastaReader, self).__init__(file)
+		self.sequence_class = sequence_class
+		self.delivers_qualities = False
+		self._delimiter = '\n' if keep_linebreaks else ''
+
+	def __iter__(self):
+		"""
+		Read next entry from the file (single entry at a time).
+		"""
+		name = None
+		seq = []
+		for i, line in enumerate(self._file):
+			# strip() also removes DOS line breaks
+			line = line.strip()
+			if not line:
+				continue
+			if line and line[0] == '>':
+				if name is not None:
+					yield self.sequence_class(name, self._delimiter.join(seq), None)
+				name = line[1:]
+				seq = []
+			elif line and line[0] == '#':
+				continue
+			elif name is not None:
+				seq.append(line)
+			else:
+				raise FormatError("At line {0}: Expected '>' at beginning of "
+					"FASTA record, but got {1!r}.".format(i+1, _shorten(line)))
+
+		if name is not None:
+			yield self.sequence_class(name, self._delimiter.join(seq), None)
+
+
+class ColorspaceFastaReader(FastaReader):
+	def __init__(self, file, keep_linebreaks=False):
+		super(ColorspaceFastaReader, self).__init__(file, keep_linebreaks, sequence_class=ColorspaceSequence)
+
+
+class FastqReader(SequenceReader):
+	"""
+	Reader for FASTQ files. Does not support multi-line FASTQ files.
+	"""
+	def __init__(self, file, sequence_class=Sequence): # TODO could be a class attribute
+		"""
+		file is a path or a file-like object. compressed files are supported.
+
+		The sequence_class should be a class such as Sequence or
+		ColorspaceSequence.
+		"""
+		super(FastqReader, self).__init__(file)
+		self.sequence_class = sequence_class
+		self.delivers_qualities = True
+
+	def __iter__(self):
+		"""
+		Return tuples: (name, sequence, qualities).
+		qualities is a string and it contains the unmodified, encoded qualities.
+		"""
+		i = 3
+		for i, line in enumerate(self._file):
+			if i % 4 == 0:
+				if not line.startswith('@'):
+					raise FormatError("Line {0} in FASTQ file is expected to start with '@', "
+						"but found {1!r}".format(i+1, line[:10]))
+				name = line.strip()[1:]
+			elif i % 4 == 1:
+				sequence = line.strip()
+			elif i % 4 == 2:
+				line = line.strip()
+				if not line.startswith('+'):
+					raise FormatError("Line {0} in FASTQ file is expected to start with '+', "
+						"but found {1!r}".format(i+1, line[:10]))
+				if len(line) > 1:
+					if line[1:] != name:
+						raise FormatError(
+							"At line {0}: Sequence descriptions in the FASTQ file do not match "
+							"({1!r} != {2!r}).\n"
+							"The second sequence description must be either empty "
+							"or equal to the first description.".format(
+								i+1, name, line[1:].rstrip()))
+					name2 = name
+				else:
+					name2 = ''
+			elif i % 4 == 3:
+				qualities = line.rstrip('\n\r')
+				yield self.sequence_class(name, sequence, qualities, name2=name2)
+		if i % 4 != 3:
+			raise FormatError("FASTQ file ended prematurely")
+
+
+try:
+	from ._seqio import FastqReader
+except ImportError:
+	pass
+
+
+class ColorspaceFastqReader(FastqReader):
+	def __init__(self, file):
+		super(ColorspaceFastqReader, self).__init__(file, sequence_class=ColorspaceSequence)
+
+
+class SRAColorspaceFastqReader(FastqReader):
+	def __init__(self, file):
+		super(SRAColorspaceFastqReader, self).__init__(file, sequence_class=sra_colorspace_sequence)
+
+
+class FastaQualReader(object):
+	"""
+	Reader for reads that are stored in .(CS)FASTA and .QUAL files.
+	"""
+	delivers_qualities = True
+
+	def __init__(self, fastafile, qualfile, sequence_class=Sequence):
+		"""
+		fastafile and qualfile are filenames or file-like objects.
+		If a filename is used, then .gz files are recognized.
+
+		The objects returned when iteritng over this file are instances of the
+		given sequence_class.
+		"""
+		self.fastareader = FastaReader(fastafile)
+		self.qualreader = FastaReader(qualfile, keep_linebreaks=True)
+		self.sequence_class = sequence_class
+
+	def __iter__(self):
+		"""
+		Yield Sequence objects.
+		"""
+		# conversion dictionary: maps strings to the appropriate ASCII-encoded character
+		conv = dict()
+		for i in range(-5, 256 - 33):
+			conv[str(i)] = chr(i + 33)
+		for fastaread, qualread in zip(self.fastareader, self.qualreader):
+			if fastaread.name != qualread.name:
+				raise FormatError("The read names in the FASTA and QUAL file "
+					"do not match ({0!r} != {1!r})".format(fastaread.name, qualread.name))
+			try:
+				qualities = ''.join([conv[value] for value in qualread.sequence.split()])
+			except KeyError as e:
+				raise FormatError("Within read named {0!r}: Found invalid quality "
+					"value {1}".format(fastaread.name, e))
+			assert fastaread.name == qualread.name
+			yield self.sequence_class(fastaread.name, fastaread.sequence, qualities)
+
+	def close(self):
+		self.fastareader.close()
+		self.qualreader.close()
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+
+class ColorspaceFastaQualReader(FastaQualReader):
+	def __init__(self, fastafile, qualfile):
+		super(ColorspaceFastaQualReader, self).__init__(fastafile, qualfile, sequence_class=ColorspaceSequence)
+
+
+def sequence_names_match(r1, r2):
+	"""
+	Check whether the sequences r1 and r2 have identical names, ignoring a
+	suffix of '1' or '2'. Some old paired-end reads have names that end in '/1'
+	and '/2'. Also, the fastq-dump tool (used for converting SRA files to FASTQ)
+	appends a .1 and .2 to paired-end reads if option -I is used.
+	"""
+	name1 = r1.name.split(None, 1)[0]
+	name2 = r2.name.split(None, 1)[0]
+	if name1[-1:] in '12' and name2[-1:] in '12':
+		name1 = name1[:-1]
+		name2 = name2[:-1]
+	return name1 == name2
+
+
+class PairedSequenceReader(object):
+	"""
+	Read paired-end reads from two files.
+
+	Wraps two SequenceReader instances, making sure that reads are properly
+	paired.
+	"""
+	def __init__(self, file1, file2, colorspace=False, fileformat=None):
+		self.reader1 = open(file1, colorspace=colorspace, fileformat=fileformat)
+		self.reader2 = open(file2, colorspace=colorspace, fileformat=fileformat)
+		self.delivers_qualities = self.reader1.delivers_qualities
+
+	def __iter__(self):
+		"""
+		Iterate over the paired reads. Each item is a pair of Sequence objects.
+		"""
+		# Avoid usage of zip() below since it will consume one item too many.
+		it1, it2 = iter(self.reader1), iter(self.reader2)
+		while True:
+			try:
+				r1 = next(it1)
+			except StopIteration:
+				# End of file 1. Make sure that file 2 is also at end.
+				try:
+					next(it2)
+					raise FormatError("Reads are improperly paired. There are more reads in "
+						"file 2 than in file 1.")
+				except StopIteration:
+					pass
+				break
+			try:
+				r2 = next(it2)
+			except StopIteration:
+				raise FormatError("Reads are improperly paired. There are more reads in "
+					"file 1 than in file 2.")
+			if not sequence_names_match(r1, r2):
+				raise FormatError("Reads are improperly paired. Read name '{0}' "
+					"in file 1 does not match '{1}' in file 2.".format(r1.name, r2.name))
+			yield (r1, r2)
+
+	def close(self):
+		self.reader1.close()
+		self.reader2.close()
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+
+class InterleavedSequenceReader(object):
+	"""
+	Read paired-end reads from an interleaved FASTQ file.
+	"""
+	def __init__(self, file, colorspace=False, fileformat=None):
+		self.reader = open(file, colorspace=colorspace, fileformat=fileformat)
+		self.delivers_qualities = self.reader.delivers_qualities
+
+	def __iter__(self):
+		# Avoid usage of zip() below since it will consume one item too many.
+		it = iter(self.reader)
+		for r1 in it:
+			try:
+				r2 = next(it)
+			except StopIteration:
+				raise FormatError("Interleaved input file incomplete: Last record has no partner.")
+			if not sequence_names_match(r1, r2):
+				raise FormatError("Reads are improperly paired. Name {0!r} "
+					"(first) does not match {1!r} (second).".format(r1.name, r2.name))
+			yield (r1, r2)
+
+	def close(self):
+		self.reader.close()
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+class FileWriter(object):
+	def __init__(self, file):
+		if isinstance(file, str):
+			self._file = xopen(file, 'w')
+			self._close_on_exit = True
+		else:
+			self._file = file
+			self._close_on_exit = False
+	
+	def close(self):
+		if self._close_on_exit:
+			self._file.close()
+	
+	def __enter__(self):
+		if self._file.closed:
+			raise ValueError("I/O operation on closed file")
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+class SingleRecordWriter(object):
+	"""Public interface to single-record files"""
+	def write(self, record):
+		raise NotImplementedError()
+
+class FastaWriter(FileWriter, SingleRecordWriter):
+	"""
+	Write FASTA-formatted sequences to a file.
+	"""
+
+	def __init__(self, file, line_length=None):
+		"""
+		If line_length is not None, the lines will
+		be wrapped after line_length characters.
+		"""
+		FileWriter.__init__(self, file)
+		self.line_length = line_length if line_length != 0 else None
+	
+	def write(self, name_or_seq, sequence=None):
+		"""Write an entry to the the FASTA file.
+
+		If only one parameter (name_or_seq) is given, it must have
+		attributes .name and .sequence, which are then used.
+		Otherwise, the first parameter must be the name and the second
+		the sequence.
+
+		The effect is that you can write this:
+		writer.write("name", "ACCAT")
+		or
+		writer.write(Sequence("name", "ACCAT"))
+		"""
+		if sequence is None:
+			name = name_or_seq.name
+			sequence = name_or_seq.sequence
+		else:
+			name = name_or_seq
+		
+		if self.line_length is not None:
+			print('>{0}'.format(name), file=self._file)
+			for i in range(0, len(sequence), self.line_length):
+				print(sequence[i:i+self.line_length], file=self._file)
+			if len(sequence) == 0:
+				print(file=self._file)
+		else:
+			print('>{0}'.format(name), sequence, file=self._file, sep='\n')
+
+class ColorspaceFastaWriter(FastaWriter):
+	def write(self, record):
+		name = record.name
+		sequence = record.primer + record.sequence
+		super(ColorspaceFastaWriter, self).write(name, sequence)
+
+class FastqWriter(FileWriter, SingleRecordWriter):
+	"""
+	Write sequences with qualities in FASTQ format.
+
+	FASTQ files are formatted like this:
+	@read name
+	SEQUENCE
+	+
+	QUALITIS
+	"""
+	def write(self, record):
+		"""
+		Write a Sequence record to the the FASTQ file.
+
+		The record must have attributes .name, .sequence and .qualities.
+		"""
+		s = ('@' + record.name + '\n' + record.sequence + '\n+' +
+				record.name2 + '\n' + record.qualities + '\n')
+		self._file.write(s)
+
+	def writeseq(self, name, sequence, qualities):
+		print("@{0:s}\n{1:s}\n+\n{2:s}".format(
+			name, sequence, qualities), file=self._file)
+
+class ColorspaceFastqWriter(FastqWriter):
+	def write(self, record):
+		name = record.name
+		sequence = record.primer + record.sequence
+		qualities = record.qualities
+		super(ColorspaceFastqWriter, self).writeseq(name, sequence, qualities)
+
+class PairRecordWriter(object):
+	"""Public interface to paired-record files"""
+	def write(self, read1, read2):
+		raise NotImplementedError()
+	def close(self):
+		raise NotImplementedError()
+	
+	def __enter__(self):
+		# TODO do not allow this twice
+		return self
+
+	def __exit__(self, *args):
+		self.close()
+
+class PairedSequenceWriter(PairRecordWriter):
+	def __init__(self, file1, file2, colorspace=False, fileformat='fastq', qualities=None):
+		self._writer1 = open(file1, colorspace=colorspace, fileformat=fileformat, mode='w',
+			qualities=qualities)
+		self._writer2 = open(file2, colorspace=colorspace, fileformat=fileformat, mode='w',
+			qualities=qualities)
+
+	def write(self, read1, read2):
+		self._writer1.write(read1)
+		self._writer2.write(read2)
+
+	def close(self):
+		self._writer1.close()
+		self._writer2.close()
+
+class InterleavedSequenceWriter(PairRecordWriter):
+	"""
+	Write paired-end reads to an interleaved FASTA or FASTQ file
+	"""
+	def __init__(self, file, colorspace=False, fileformat='fastq', qualities=None):
+		self._writer = open(file, colorspace=colorspace, fileformat=fileformat, mode='w', qualities=qualities)
+
+	def write(self, read1, read2):
+		self._writer.write(read1)
+		self._writer.write(read2)
+
+	def close(self):
+		self._writer.close()
+
+class UnknownFileType(Exception):
+	"""
+	Raised when open could not autodetect the file type.
+	"""
+
+
+def open(file1, file2=None, qualfile=None, colorspace=False, fileformat=None,
+	interleaved=False, mode='r', qualities=None):
+	"""
+	Open sequence files in FASTA or FASTQ format for reading or writing. This is
+	a factory that returns an instance of one of the ...Reader or ...Writer
+	classes also defined in this module.
+
+	file1, file2, qualfile -- Paths to regular or compressed files or file-like
+		objects. Use file1 if data is single-end. If also file2 is provided,
+		sequences are paired. If qualfile is given, then file1 must be a FASTA
+		file and sequences are single-end. One of file2 and qualfile must always
+		be None (no paired-end data is supported when reading qualfiles).
+
+	mode -- Either 'r' for reading or 'w' for writing.
+
+	interleaved -- If True, then file1 contains interleaved paired-end data.
+		file2 and qualfile must be None in this case.
+
+	colorspace -- If True, instances of the Colorspace... classes
+		are returned.
+
+	fileformat -- If set to None, file format is autodetected from the file name
+		extension. Set to 'fasta', 'fastq', or 'sra-fastq' to not auto-detect.
+		Colorspace is not auto-detected and must always be requested explicitly.
+
+	qualities -- When mode is 'w' and fileformat is None, this can be set to
+		True or False to specify whether the written sequences will have quality
+		values. This is is used in two ways:
+		* If the output format cannot be determined (unrecognized extension
+		  etc), no exception is raised, but fasta or fastq format is chosen
+		  appropriately.
+		* When False (no qualities available), an exception is raised when the
+		  auto-detected output format is FASTQ.
+	"""
+	if mode not in ('r', 'w'):
+		raise ValueError("Mode must be 'r' or 'w'")
+	if interleaved and (file2 is not None or qualfile is not None):
+		raise ValueError("When interleaved is set, file2 and qualfile must be None")
+	if file2 is not None and qualfile is not None:
+		raise ValueError("Setting both file2 and qualfile is not supported")
+	if file2 is not None:
+		if mode == 'r':
+			return PairedSequenceReader(file1, file2, colorspace, fileformat)
+		else:
+			return PairedSequenceWriter(file1, file2, colorspace, fileformat, qualities)
+
+	if interleaved:
+		if mode == 'r':
+			return InterleavedSequenceReader(file1, colorspace, fileformat)
+		else:
+			return InterleavedSequenceWriter(file1, colorspace, fileformat, qualities)
+
+	if qualfile is not None:
+		if mode == 'w':
+			raise NotImplementedError('Writing to csfasta/qual not supported')
+		if colorspace:
+			# read from .(CS)FASTA/.QUAL
+			return ColorspaceFastaQualReader(file1, qualfile)
+		else:
+			return FastaQualReader(file1, qualfile)
+
+	# All the multi-file things have been dealt with, delegate rest to the
+	# single-file function.
+	return _seqopen1(file1, colorspace=colorspace, fileformat=fileformat,
+		mode=mode, qualities=qualities)
+
+
+def _seqopen1(file, colorspace=False, fileformat=None, mode='r', qualities=None):
+	"""
+	Open a single sequence file. See description above.
+	"""
+	if mode == 'r':
+		fastq_handler = ColorspaceFastqReader if colorspace else FastqReader
+		fasta_handler = ColorspaceFastaReader if colorspace else FastaReader
+	elif mode == 'w':
+		fastq_handler = ColorspaceFastqWriter if colorspace else FastqWriter
+		fasta_handler = ColorspaceFastaWriter if colorspace else FastaWriter
+	else:
+		raise ValueError("Mode must be 'r' or 'w'")
+
+	if fileformat:  # Explict file format given
+		fileformat = fileformat.lower()
+		if fileformat == 'fasta':
+			return fasta_handler(file)
+		elif fileformat == 'fastq':
+			return fastq_handler(file)
+		elif fileformat == 'sra-fastq' and colorspace:
+			if mode == 'w':
+				raise NotImplementedError('Writing to sra-fastq not supported')
+			return SRAColorspaceFastqReader(file)
+		else:
+			raise UnknownFileType("File format {0!r} is unknown (expected "
+				"'sra-fastq' (only for colorspace), 'fasta' or 'fastq').".format(fileformat))
+
+	# Detect file format
+	name = None
+	if file == "-":
+		file = sys.stdin if mode == 'r' else sys.stdout
+	elif isinstance(file, basestring):
+		name = file
+	elif hasattr(file, "name"):  # seems to be an open file-like object
+		name = file.name
+
+	if name:
+		for ext in ('.gz', '.xz', '.bz2'):
+			if name.endswith(ext):
+				name = name[:-len(ext)]
+				break
+		name, ext = splitext(name)
+		ext = ext.lower()
+		if ext in ['.fasta', '.fa', '.fna', '.csfasta', '.csfa']:
+			format = 'fasta'
+		elif ext in ['.fastq', '.fq'] or (ext == '.txt' and name.endswith('_sequence')):
+			format = 'fastq'
+		elif mode == 'w' and qualities is True:
+			# Format not recognized, but know we want to write reads with qualities
+			format = 'fastq'
+		elif mode == 'w' and qualities is False:
+			# Same, but we know that we want to write reads without qualities
+			format = 'fasta'
+		else:
+			raise UnknownFileType("Could not determine whether file {0!r} is FASTA "
+				"or FASTQ: file name extension {1!r} not recognized".format(file, ext))
+		if format == 'fastq' and qualities is False:
+			raise ValueError("Output format cannot be FASTQ since no quality "
+				"values are available.")
+		if format == 'fastq':
+			return fastq_handler(file)
+		else:
+			return fasta_handler(file)
+
+	if mode == 'w':
+		if qualities is True:
+			return fastq_handler(file)
+		elif qualities is False:
+			return fasta_handler(file)
+		raise UnknownFileType('Cannot determine whether to write in FASTA or '
+			'FASTQ format')
+	# No name available. Try to autodetect type by reading from the file.
+	for line in file:
+		if line.startswith('#'):
+			# Skip comment lines (needed for csfasta)
+			continue
+		if line.startswith('>'):
+			return fasta_handler(FileWithPrependedLine(file, line))
+		if line.startswith('@'):
+			return fastq_handler(FileWithPrependedLine(file, line))
+	raise UnknownFileType("File is neither FASTQ nor FASTA.")
diff --git a/cutadapt/xopen.py b/cutadapt/xopen.py
new file mode 100644
index 0000000..c1b8c90
--- /dev/null
+++ b/cutadapt/xopen.py
@@ -0,0 +1,182 @@
+"""
+Open compressed files transparently.
+"""
+from __future__ import print_function, division, absolute_import
+__author__ = 'Marcel Martin'
+
+import gzip
+import sys
+import io
+import os
+from subprocess import Popen, PIPE
+from .compat import PY3, basestring
+
+try:
+	import bz2
+except ImportError:
+	bz2 = None
+
+try:
+	import lzma
+except ImportError:
+	lzma = None
+
+if sys.version_info < (2, 7):
+	buffered_reader = lambda x: x
+	buffered_writer = lambda x: x
+else:
+	buffered_reader = io.BufferedReader
+	buffered_writer = io.BufferedWriter
+
+
+class GzipWriter:
+	def __init__(self, path, mode='w'):
+		self.outfile = open(path, mode)
+		self.devnull = open(os.devnull, 'w')
+		try:
+			# Setting close_fds to True is necessary due to
+			# http://bugs.python.org/issue12786
+			self.process = Popen(['gzip'], stdin=PIPE, stdout=self.outfile,
+				stderr=self.devnull, close_fds=True)
+		except IOError as e:
+			self.outfile.close()
+			self.devnull.close()
+			raise
+
+	def write(self, arg):
+		self.process.stdin.write(arg)
+
+	def close(self):
+		self.process.stdin.close()
+		retcode = self.process.wait()
+		self.outfile.close()
+		self.devnull.close()
+		if retcode != 0:
+			raise IOError("Output gzip process terminated with exit code {0}".format(retcode))
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+
+class GzipReader:
+	def __init__(self, path):
+		self.process = Popen(['gzip', '-cd', path], stdout=PIPE)
+
+	def close(self):
+		retcode = self.process.poll()
+		if retcode is None:
+			# still running
+			self.process.terminate()
+		self._raise_if_error()
+
+	def __iter__(self):
+		for line in self.process.stdout:
+			yield line
+		self.process.wait()
+		self._raise_if_error()
+
+	def _raise_if_error(self):
+		"""
+		Raise EOFError if process is not running anymore and the
+		exit code is nonzero.
+		"""
+		retcode = self.process.poll()
+		if retcode is not None and retcode != 0:
+			raise EOFError("gzip process returned non-zero exit code {0}. Is the "
+				"input file truncated or corrupt?".format(retcode))
+
+	def read(self, *args):
+		data = self.process.stdout.read(*args)
+		if len(args) == 0 or args[0] <= 0:
+			# wait for process to terminate until we check the exit code
+			self.process.wait()
+		self._raise_if_error()
+
+	def __enter__(self):
+		return self
+
+	def __exit__(self, *exc_info):
+		self.close()
+
+
+def xopen(filename, mode='r'):
+	"""
+	Replacement for the "open" function that can also open files that have
+	been compressed with gzip, bzip2 or xz. If the filename is '-', standard
+	output (mode 'w') or input (mode 'r') is returned. If the filename ends
+	with .gz, the file is opened with a pipe to the gzip program. If that
+	does not work, then gzip.open() is used (the gzip module is slower than
+	the pipe to the gzip program). If the filename ends with .bz2, it's
+	opened as a bz2.BZ2File. Otherwise, the regular open() is used.
+
+	mode can be: 'rt', 'rb', 'a', 'wt', or 'wb'
+	Instead of 'rt' and 'wt', 'r' and 'w' can be used as abbreviations.
+
+	In Python 2, the 't' and 'b' characters are ignored.
+
+	Append mode ('a') is unavailable with BZ2 compression and will raise an error.
+	"""
+	if mode == 'r':
+		mode = 'rt'
+	elif mode == 'w':
+		mode = 'wt'
+	if mode not in ('rt', 'rb', 'wt', 'wb', 'a'):
+		raise ValueError("mode '{0}' not supported".format(mode))
+	if not PY3:
+		mode = mode[0]
+	if not isinstance(filename, basestring):
+		raise ValueError("the filename must be a string")
+
+	# standard input and standard output handling
+	if filename == '-':
+		if not PY3:
+			return sys.stdin if 'r' in mode else sys.stdout
+		return dict(
+			rt=sys.stdin,
+			wt=sys.stdout,
+			rb=sys.stdin.buffer,
+			wb=sys.stdout.buffer)[mode]
+
+	if filename.endswith('.bz2'):
+		if bz2 is None:
+			raise ImportError("Cannot open bz2 files: The bz2 module is not available")
+		if PY3:
+			if 't' in mode:
+				return io.TextIOWrapper(bz2.BZ2File(filename, mode[0]))
+			else:
+				return bz2.BZ2File(filename, mode)
+		else:
+			return bz2.BZ2File(filename, mode)
+	elif filename.endswith('.xz'):
+		if lzma is None:
+			raise ImportError("Cannot open xz files: The lzma module is not available "
+				"(use Python 3.3 or newer)")
+		return lzma.open(filename, mode)
+	elif filename.endswith('.gz'):
+		if PY3:
+			if 't' in mode:
+				# gzip.open in Python 3.2 does not support modes 'rt' and 'wt''
+				return io.TextIOWrapper(gzip.open(filename, mode[0]))
+			else:
+				if 'r' in mode:
+					return io.BufferedReader(gzip.open(filename, mode))
+				else:
+					return io.BufferedWriter(gzip.open(filename, mode))
+		else:
+			# rb/rt are equivalent in Py2
+			if 'r' in mode:
+				try:
+					return GzipReader(filename)
+				except IOError:
+					# gzip not installed
+					return buffered_reader(gzip.open(filename, mode))
+			else:
+				try:
+					return GzipWriter(filename, mode)
+				except IOError:
+					return buffered_writer(gzip.open(filename, mode))
+	else:
+		return open(filename, mode)
diff --git a/doc/Makefile b/doc/Makefile
new file mode 100644
index 0000000..d5b1f21
--- /dev/null
+++ b/doc/Makefile
@@ -0,0 +1,179 @@
+# Makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line.
+SPHINXOPTS    =
+SPHINXBUILD   = sphinx-build
+PAPER         =
+BUILDDIR      = _build
+
+# User-friendly check for sphinx-build
+ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
+$(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
+endif
+
+# Internal variables.
+PAPEROPT_a4     = -D latex_paper_size=a4
+PAPEROPT_letter = -D latex_paper_size=letter
+ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+# the i18n builder cannot share the environment and doctrees with the others
+I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
+
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext
+
+all: html
+
+help:
+	@echo "Please use \`make <target>' where <target> is one of"
+	@echo "  html       to make standalone HTML files"
+	@echo "  dirhtml    to make HTML files named index.html in directories"
+	@echo "  singlehtml to make a single large HTML file"
+	@echo "  pickle     to make pickle files"
+	@echo "  json       to make JSON files"
+	@echo "  htmlhelp   to make HTML files and a HTML help project"
+	@echo "  qthelp     to make HTML files and a qthelp project"
+	@echo "  devhelp    to make HTML files and a Devhelp project"
+	@echo "  epub       to make an epub"
+	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
+	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
+	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
+	@echo "  text       to make text files"
+	@echo "  man        to make manual pages"
+	@echo "  texinfo    to make Texinfo files"
+	@echo "  info       to make Texinfo files and run them through makeinfo"
+	@echo "  gettext    to make PO message catalogs"
+	@echo "  changes    to make an overview of all changed/added/deprecated items"
+	@echo "  xml        to make Docutils-native XML files"
+	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
+	@echo "  linkcheck  to check all external links for integrity"
+	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
+
+clean:
+	rm -rf $(BUILDDIR)/*
+
+html:
+	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
+
+dirhtml:
+	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
+	@echo
+	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
+
+singlehtml:
+	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
+	@echo
+	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
+
+pickle:
+	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
+	@echo
+	@echo "Build finished; now you can process the pickle files."
+
+json:
+	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
+	@echo
+	@echo "Build finished; now you can process the JSON files."
+
+htmlhelp:
+	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
+	@echo
+	@echo "Build finished; now you can run HTML Help Workshop with the" \
+	      ".hhp project file in $(BUILDDIR)/htmlhelp."
+
+qthelp:
+	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
+	@echo
+	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
+	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
+	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/cutadapt.qhcp"
+	@echo "To view the help file:"
+	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/cutadapt.qhc"
+
+devhelp:
+	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
+	@echo
+	@echo "Build finished."
+	@echo "To view the help file:"
+	@echo "# mkdir -p $$HOME/.local/share/devhelp/cutadapt"
+	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/cutadapt"
+	@echo "# devhelp"
+
+epub:
+	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
+	@echo
+	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
+
+latex:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo
+	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
+	@echo "Run \`make' in that directory to run these through (pdf)latex" \
+	      "(use \`make latexpdf' here to do that automatically)."
+
+latexpdf:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through pdflatex..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+latexpdfja:
+	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
+	@echo "Running LaTeX files through platex and dvipdfmx..."
+	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
+	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
+
+text:
+	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
+	@echo
+	@echo "Build finished. The text files are in $(BUILDDIR)/text."
+
+man:
+	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
+	@echo
+	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
+
+texinfo:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo
+	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
+	@echo "Run \`make' in that directory to run these through makeinfo" \
+	      "(use \`make info' here to do that automatically)."
+
+info:
+	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
+	@echo "Running Texinfo files through makeinfo..."
+	make -C $(BUILDDIR)/texinfo info
+	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
+
+gettext:
+	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
+	@echo
+	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
+
+changes:
+	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
+	@echo
+	@echo "The overview file is in $(BUILDDIR)/changes."
+
+linkcheck:
+	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
+	@echo
+	@echo "Link check complete; look for any errors in the above output " \
+	      "or in $(BUILDDIR)/linkcheck/output.txt."
+
+doctest:
+	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
+	@echo "Testing of doctests in the sources finished, look at the " \
+	      "results in $(BUILDDIR)/doctest/output.txt."
+
+xml:
+	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
+	@echo
+	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
+
+pseudoxml:
+	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
+	@echo
+	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
diff --git a/doc/_static/adapters.svg b/doc/_static/adapters.svg
new file mode 100644
index 0000000..99cf4bd
--- /dev/null
+++ b/doc/_static/adapters.svg
@@ -0,0 +1,259 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   version="1.0"
+   width="500.50909"
+   height="365.63535"
+   id="svg5571">
+  <defs
+     id="defs5573" />
+  <metadata
+     id="metadata5576">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     transform="translate(-4.4323702,147.9297)"
+     id="layer1">
+    <rect
+       width="35.933102"
+       height="7.0866098"
+       x="111.386"
+       y="-52.720001"
+       id="rect6974"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="106.299"
+       height="7.0866098"
+       x="5.0866399"
+       y="-52.720001"
+       id="rect3625"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="141.73199"
+       height="7.0866098"
+       x="5.5865898"
+       y="-52.720001"
+       id="rect5585"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times  [...]
+    <rect
+       width="70.866096"
+       height="7.0866199"
+       x="83.385101"
+       y="-123.586"
+       id="rect6102"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="71.020401"
+       height="7.0866299"
+       x="111.732"
+       y="-66.893303"
+       id="rect6104"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="70.866096"
+       height="7.0866098"
+       x="268.57001"
+       y="136.66589"
+       id="rect6130"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Tim [...]
+    <rect
+       width="70.866096"
+       height="7.0866098"
+       x="268.57001"
+       y="172.099"
+       id="rect6972"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="70.866096"
+       height="7.0866199"
+       x="268.57001"
+       y="207.532"
+       id="rect7032"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <text
+       x="353.60956"
+       y="214.61865"
+       id="text6978"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="353.60956"
+         y="214.61865"
+         id="tspan6980">Removed sequence</tspan></text>
+    <text
+       x="353.60956"
+       y="179.18559"
+       id="text6982"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="353.60956"
+         y="179.18559"
+         id="tspan6984">Adapter</tspan></text>
+    <text
+       x="353.60956"
+       y="143.75253"
+       id="text6986"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="353.60956"
+         y="143.75253"
+         id="tspan6988">Read </tspan></text>
+    <rect
+       width="70.866096"
+       height="7.0866098"
+       x="4.9323802"
+       y="193.35901"
+       id="rect5587"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="70.866096"
+       height="7.0866098"
+       x="4.9324002"
+       y="207.532"
+       id="rect7030"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="141.73199"
+       height="7.0866098"
+       x="4.9324002"
+       y="207.532"
+       id="rect6976"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times  [...]
+    <rect
+       width="99.712601"
+       height="7.0866199"
+       x="82.885101"
+       y="-109.413"
+       id="rect7028"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="77.952797"
+       height="7.0866199"
+       x="4.9324002"
+       y="-109.413"
+       id="rect3627"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="177.16499"
+       height="7.0866098"
+       x="5.4323401"
+       y="-109.413"
+       id="rect7199"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times  [...]
+    <rect
+       width="70.866096"
+       height="7.0865698"
+       x="4.9323702"
+       y="24.8864"
+       id="rect6128"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="70.866096"
+       height="7.0865698"
+       x="4.9323902"
+       y="81.5793"
+       id="rect6114"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#84b818;fill-opacity:1;fill-rule:nonzero;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman; [...]
+    <rect
+       width="70.866203"
+       height="7.0866299"
+       x="4.9323702"
+       y="39.059551"
+       id="rect7058"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="70.366096"
+       height="7.0866299"
+       x="-146.16499"
+       y="39.059551"
+       transform="scale(-1,1)"
+       id="rect3629"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="141.73199"
+       height="7.0866098"
+       x="4.9323702"
+       y="39.059551"
+       id="rect6132"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times  [...]
+    <rect
+       width="35.433102"
+       height="7.0866199"
+       x="40.365501"
+       y="95.752502"
+       id="rect7056"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#b3b3b3;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="70.866203"
+       height="7.0866199"
+       x="75.2985"
+       y="95.752502"
+       id="rect3631"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:#ffffff;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:3;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times New Roman, Bold'" />
+    <rect
+       width="106.299"
+       height="7.0866098"
+       x="40.365501"
+       y="95.752502"
+       id="rect6134"
+       style="font-size:12px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:125%;writing-mode:lr-tb;text-anchor:start;fill:none;stroke:#000000;stroke-width:1;stroke-linecap:butt;stroke-linejoin:round;stroke-miterlimit:4;stroke-opacity:1;stroke-dasharray:none;stroke-dashoffset:0;marker:none;visibility:visible;display:inline;overflow:visible;enable-background:accumulate;font-family:Times New Roman;-inkscape-font-specification:'Times  [...]
+    <text
+       x="4.9323802"
+       y="10.713129"
+       id="text3333"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="4.9323802"
+         y="10.713129"
+         id="tspan3335">5' Adapter</tspan></text>
+    <text
+       x="4.9323802"
+       y="-130.6727"
+       id="text3337"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="4.9323802"
+         y="-130.6727"
+         id="tspan3339">3' Adapter</tspan></text>
+    <text
+       x="4.9323802"
+       y="179.18558"
+       id="text3341"
+       xml:space="preserve"
+       style="font-size:18px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="4.9323802"
+         y="179.18558"
+         id="tspan3343">Anchored 5' adapter</tspan></text>
+    <text
+       x="40.865387"
+       y="-81.066414"
+       id="text3349"
+       xml:space="preserve"
+       style="font-size:13.63599968px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="40.865387"
+         y="-81.066414"
+         id="tspan3351">or</tspan></text>
+    <text
+       x="40.365467"
+       y="67.405998"
+       id="text3353"
+       xml:space="preserve"
+       style="font-size:13.63599968px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:125%;letter-spacing:0px;word-spacing:0px;writing-mode:lr-tb;text-anchor:start;fill:#000000;fill-opacity:1;stroke:none;font-family:Lato;-inkscape-font-specification:Lato"><tspan
+         x="40.365467"
+         y="67.405998"
+         id="tspan3355">or</tspan></text>
+  </g>
+</svg>
diff --git a/doc/_static/logo.svg b/doc/_static/logo.svg
new file mode 100644
index 0000000..24a06b4
--- /dev/null
+++ b/doc/_static/logo.svg
@@ -0,0 +1,94 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!-- Created with Inkscape (http://www.inkscape.org/) -->
+
+<svg
+   xmlns:dc="http://purl.org/dc/elements/1.1/"
+   xmlns:cc="http://creativecommons.org/ns#"
+   xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
+   xmlns:svg="http://www.w3.org/2000/svg"
+   xmlns="http://www.w3.org/2000/svg"
+   xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
+   xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
+   width="56.122009"
+   height="51.8545"
+   id="svg3076"
+   version="1.1"
+   inkscape:version="0.48.5 r10040"
+   sodipodi:docname="New document 2">
+  <defs
+     id="defs3078" />
+  <sodipodi:namedview
+     id="base"
+     pagecolor="#ffffff"
+     bordercolor="#666666"
+     borderopacity="1.0"
+     inkscape:pageopacity="0.0"
+     inkscape:pageshadow="2"
+     inkscape:zoom="2.6502665"
+     inkscape:cx="41.639266"
+     inkscape:cy="34.486602"
+     inkscape:document-units="px"
+     inkscape:current-layer="layer1"
+     showgrid="false"
+     fit-margin-top="2"
+     fit-margin-left="2"
+     fit-margin-right="2"
+     fit-margin-bottom="2"
+     inkscape:window-width="1305"
+     inkscape:window-height="763"
+     inkscape:window-x="-4"
+     inkscape:window-y="56"
+     inkscape:window-maximized="0" />
+  <metadata
+     id="metadata3081">
+    <rdf:RDF>
+      <cc:Work
+         rdf:about="">
+        <dc:format>image/svg+xml</dc:format>
+        <dc:type
+           rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
+        <dc:title></dc:title>
+      </cc:Work>
+    </rdf:RDF>
+  </metadata>
+  <g
+     inkscape:label="Layer 1"
+     inkscape:groupmode="layer"
+     id="layer1"
+     transform="translate(-346.939,-506.43493)">
+    <g
+       transform="translate(44.935994,179.79303)"
+       style="display:inline"
+       id="g4093"
+       inkscape:export-filename="cutadapt.png"
+       inkscape:export-xdpi="276"
+       inkscape:export-ydpi="276">
+      <path
+         inkscape:connector-curvature="0"
+         id="path4068"
+         transform="translate(0,308.2677)"
+         d="m 349.625,34.1875 -7.78125,3.625 c 0.91994,1.970873 1.4375,4.181472 1.4375,6.5 0,2.318528 -0.51756,4.497877 -1.4375,6.46875 l 7.78125,3.625 c 2.98923,-6.41042 2.98923,-13.808331 0,-20.21875 z"
+         style="fill:#aad400;fill-opacity:1;display:inline" />
+      <path
+         inkscape:connector-curvature="0"
+         id="path4066"
+         transform="translate(0,308.2677)"
+         d="m 328.15625,20.375 c -6.89497,-0.05633 -13.78424,2.867991 -18.5625,8.5625 -8.49469,10.123572 -7.15482,25.192814 2.96875,33.6875 10.12357,8.494686 25.19281,7.186072 33.6875,-2.9375 l -6.40625,-5.375 c -2.85492,3.3998 -7.11936,5.5625 -11.90625,5.5625 -8.59538,0 -15.5625,-6.96712 -15.5625,-15.5625 0,-8.59538 6.96712,-15.5625 15.5625,-15.5625 4.78689,0 9.05133,2.1627 11.90625,5.5625 l 6.40625,-5.375 c -0.8954,-1.067094 -1.87041,-2.073352 -2.9375,-2.96875 -4.42906,-3.716425 -9.793 [...]
+         style="fill:#217821;fill-opacity:1;display:inline" />
+      <path
+         sodipodi:nodetypes="cccccc"
+         inkscape:connector-curvature="0"
+         id="path4072"
+         transform="translate(0,308.2677)"
+         d="m 353.4375,25.09375 -15.28125,11.5 0.0497,1.108915 1.04406,0.578585 16.875,-8.96875 c -0.78525,-1.47685 -1.68088,-2.882924 -2.6875,-4.21875 z"
+         style="fill:#217821;fill-opacity:1;display:inline" />
+      <path
+         sodipodi:nodetypes="cccccc"
+         inkscape:connector-curvature="0"
+         id="path4074"
+         transform="translate(0,308.2677)"
+         d="m 339.25,50.3125 -1.04688,0.4375 -0.0469,1.25 15.28125,11.53125 c 1.00662,-1.335826 1.90224,-2.77315 2.6875,-4.25 z"
+         style="fill:#217821;fill-opacity:1;display:inline" />
+    </g>
+  </g>
+</svg>
diff --git a/doc/changes.rst b/doc/changes.rst
new file mode 100644
index 0000000..d9e113e
--- /dev/null
+++ b/doc/changes.rst
@@ -0,0 +1 @@
+.. include:: ../CHANGES.rst
diff --git a/doc/colorspace.rst b/doc/colorspace.rst
new file mode 100644
index 0000000..fc9c599
--- /dev/null
+++ b/doc/colorspace.rst
@@ -0,0 +1,128 @@
+Colorspace reads
+================
+
+Cutadapt was designed to work with colorspace reads from the ABi SOLiD
+sequencer. Colorspace trimming is activated by the ``--colorspace``
+option (or use ``-c`` for short). The input reads can be given either:
+
+-  in a FASTA file
+-  in a FASTQ file
+-  in a ``.csfasta`` and a ``.qual`` file (this is the native SOLiD
+   format).
+
+In all cases, the colors must be represented by the characters 0, 1, 2,
+3. Example input files are in the cutadapt distribution at
+``tests/data/solid.*``. The ``.csfasta``/``.qual`` file format is
+automatically assumed if two input files are given to cutadapt.
+
+In colorspace mode, the adapter sequences given to the ``-a``, ``-b``
+and ``-g`` options can be given both as colors or as nucleotides. If
+given as nucleotides, they will automatically be converted to
+colorspace. For example, to trim an adapter from ``solid.csfasta`` and
+``solid.qual``, use this command-line::
+
+    cutadapt -c -a CGCCTTGGCCGTACAGCAG solid.csfasta solid.qual > output.fastq
+
+In case you know the colorspace adapter sequence, you can also write
+``330201030313112312`` instead of ``CGCCTTGGCCGTACAGCAG`` and the result
+is the same.
+
+Ambiguity in colorspace
+-----------------------
+
+The ambiguity of colorspace encoding leads to some effects to be aware
+of when trimming 3' adapters from colorspace reads. For example, when
+trimming the adapter ``AACTC``, cutadapt searches for its
+colorspace-encoded version ``0122``. But also ``TTGAG``, ``CCAGA`` and
+``GGTCT`` have an encoding of ``0122``. This means that effectively four
+different adapter sequences are searched and trimmed at the same time.
+There is no way around this, unless the decoded sequence were available,
+but that is usually only the case after read mapping.
+
+The effect should usually be quite small. The number of false positives
+is multiplied by four, but with a sufficiently large overlap (3 or 4 is
+already enough), this is still only around 0.2 bases lost per read on
+average. If inspecting k-mer frequencies or using small overlaps, you
+need to be aware of the effect, however.
+
+
+Double-encoding, BWA and MAQ
+----------------------------
+
+The read mappers MAQ and BWA (and possibly others) need their colorspace
+input reads to be in a so-called "double encoding". This simply means
+that they cannot deal with the characters 0, 1, 2, 3 in the reads, but
+require that the letters A, C, G, T be used for colors. For example, the
+colorspace sequence ``0011321`` would be ``AACCTGC`` in double-encoded
+form. This is not the same as conversion to basespace! The read is still
+in colorspace, only letters are used instead of digits. If that sounds
+confusing, that is because it is.
+
+Note that MAQ is unmaintained and should not be used in new projects.
+
+BWA’s colorspace support was dropped in versions more recent than 0.5.9,
+but that version works well.
+
+When you want to trim reads that will be mapped with BWA or MAQ, you can
+use the ``--bwa`` option, which enables colorspace mode (``-c``),
+double-encoding (``-d``), primer trimming (``-t``), all of which are
+required for BWA, in addition to some other useful options.
+
+The ``--maq`` option is an alias for ``--bwa``.
+
+
+Colorspace examples
+-------------------
+
+To cut an adapter from SOLiD data given in ``solid.csfasta`` and
+``solid.qual``, to produce MAQ- and BWA-compatible output, allow the
+default of 10% errors and write the resulting FASTQ file to
+output.fastq::
+
+    cutadapt --bwa -a CGCCTTGGCCGTACAGCAG solid.csfasta solid.qual > output.fastq
+
+Instead of redirecting standard output with ``>``, the ``-o`` option can
+be used. This also shows that you can give the adapter in colorspace and
+how to use a different error rate::
+
+    cutadapt --bwa -e 0.15 -a 330201030313112312 -o output.fastq solid.csfasta solid.qual
+
+This does the same as above, but produces BFAST-compatible output,
+strips the \_F3 suffix from read names and adds the prefix "abc:" to
+them::
+
+    cutadapt -c -e 0.15 -a 330201030313112312 -x abc: --strip-f3 solid.csfasta solid.qual > output.fastq
+
+
+Bowtie
+------
+
+Quality values of colorspace reads are sometimes negative. Bowtie gets
+confused and prints this message::
+
+    Encountered a space parsing the quality string for read xyz
+
+BWA also has a problem with such data. Cutadapt therefore converts
+negative quality values to zero in colorspace data. Use the option
+``--no-zero-cap`` to turn this off.
+
+.. _sra-fastq:
+
+Sequence Read Archive
+---------------------
+
+The Sequence Read Archive provides files in a special "SRA" file format. When
+the ``fastq-dump`` program from the sra-toolkit package is used to convert
+these ``.sra`` files to FASTQ format, colorspace reads will get an extra
+quality value in the beginning of each read. You may get an error like this::
+
+    cutadapt: error: In read named 'xyz': length of colorspace quality
+    sequence (36) and length of read (35) do not match (primer is: 'T')
+
+To make cutadapt ignore the extra quality base, add ``--format=sra-fastq`` to
+your command-line, as in this example::
+
+    cutadapt -c --format=sra-fastq -a CGCCTTGGCCG sra.fastq > trimmed.fastq
+
+When you use ``--format=sra-fastq``, the spurious quality value will be removed
+from all reads in the file.
diff --git a/doc/conf.py b/doc/conf.py
new file mode 100644
index 0000000..bca116e
--- /dev/null
+++ b/doc/conf.py
@@ -0,0 +1,270 @@
+# -*- coding: utf-8 -*-
+#
+# cutadapt documentation build configuration file, created by
+# sphinx-quickstart on Fri Sep 12 09:11:16 2014.
+#
+# This file is execfile()d with the current directory set to its
+# containing dir.
+#
+# Note that not all possible configuration values are present in this
+# autogenerated file.
+#
+# All configuration values have a default; values that are commented out
+# serve to show the default.
+
+import sys
+import os
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+sys.path.insert(0, os.path.abspath(os.pardir))
+
+# -- General configuration ------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix of source filenames.
+source_suffix = '.rst'
+
+# The encoding of source files.
+#source_encoding = 'utf-8-sig'
+
+# The master toctree document.
+master_doc = 'index'
+
+# General information about the project.
+project = u'cutadapt'
+copyright = u'2010-2016, Marcel Martin'
+
+# The version info for the project you're documenting, acts as replacement for
+# |version| and |release|, also used in various other places throughout the
+# built documents.
+
+from cutadapt import __version__
+
+#
+# The short X.Y version.
+version = __version__
+# The full version, including alpha/beta/rc tags.
+release = __version__
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#language = None
+
+# There are two options for replacing |today|: either, you set today to some
+# non-false value, then it is used:
+#today = ''
+# Else, today_fmt is used as the format for a strftime call.
+#today_fmt = '%B %d, %Y'
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+exclude_patterns = ['_build']
+
+# The reST default role (used for this markup: `text`) to use for all
+# documents.
+#default_role = None
+
+# If true, '()' will be appended to :func: etc. cross-reference text.
+#add_function_parentheses = True
+
+# If true, the current module name will be prepended to all description
+# unit titles (such as .. function::).
+#add_module_names = True
+
+# If true, sectionauthor and moduleauthor directives will be shown in the
+# output. They are ignored by default.
+#show_authors = False
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = 'sphinx'
+
+# A list of ignored prefixes for module index sorting.
+#modindex_common_prefix = []
+
+# If true, keep warnings as "system message" paragraphs in the built documents.
+#keep_warnings = False
+
+
+# -- Options for HTML output ----------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+html_theme = 'default'
+try:
+	from better import better_theme_path
+	html_theme_path = [better_theme_path]
+	html_theme = 'better'
+except ImportError:
+	pass
+
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#html_theme_options = {}
+
+# Add any paths that contain custom themes here, relative to this directory.
+#html_theme_path = []
+
+# The name for this set of Sphinx documents.  If None, it defaults to
+# "<project> v<release> documentation".
+#html_title = None
+
+# A shorter title for the navigation bar.  Default is the same as html_title.
+#html_short_title = None
+
+# The name of an image file (relative to this directory) to place at the top
+# of the sidebar.
+#html_logo = 'logo.png'
+
+# The name of an image file (within the static path) to use as favicon of the
+# docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
+# pixels large.
+#html_favicon = None
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add any extra paths that contain custom files (such as robots.txt or
+# .htaccess) here, relative to this directory. These files are copied
+# directly to the root of the documentation.
+#html_extra_path = []
+
+# If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
+# using the given strftime format.
+#html_last_updated_fmt = '%b %d, %Y'
+
+# If true, SmartyPants will be used to convert quotes and dashes to
+# typographically correct entities.
+html_use_smartypants = True
+
+# Custom sidebar templates, maps document names to template names.
+#html_sidebars = {}
+
+# Additional templates that should be rendered to pages, maps page names to
+# template names.
+#html_additional_pages = {}
+
+# If false, no module index is generated.
+#html_domain_indices = True
+
+# If false, no index is generated.
+#html_use_index = True
+
+# If true, the index is split into individual pages for each letter.
+#html_split_index = False
+
+# If true, links to the reST sources are added to the pages.
+#html_show_sourcelink = True
+
+# If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
+#html_show_sphinx = True
+
+# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
+#html_show_copyright = True
+
+# If true, an OpenSearch description file will be output, and all pages will
+# contain a <link> tag referring to it.  The value of this option must be the
+# base URL from which the finished HTML is served.
+#html_use_opensearch = ''
+
+# This is the file name suffix for HTML files (e.g. ".xhtml").
+#html_file_suffix = None
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'cutadaptdoc'
+
+
+# -- Options for LaTeX output ---------------------------------------------
+
+latex_elements = {
+# The paper size ('letterpaper' or 'a4paper').
+'papersize': 'a4paper',
+
+# The font size ('10pt', '11pt' or '12pt').
+#'pointsize': '10pt',
+
+# Additional stuff for the LaTeX preamble.
+#'preamble': '',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+  ('index', 'cutadapt.tex', u'cutadapt Documentation',
+   u'Marcel Martin', 'manual'),
+]
+
+# The name of an image file (relative to this directory) to place at the top of
+# the title page.
+#latex_logo = None
+
+# For "manual" documents, if this is true, then toplevel headings are parts,
+# not chapters.
+#latex_use_parts = False
+
+# If true, show page references after internal links.
+#latex_show_pagerefs = False
+
+# If true, show URL addresses after external links.
+#latex_show_urls = False
+
+# Documents to append as an appendix to all manuals.
+#latex_appendices = []
+
+# If false, no module index is generated.
+#latex_domain_indices = True
+
+
+# -- Options for manual page output ---------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    ('index', 'cutadapt', u'cutadapt Documentation',
+     [u'Marcel Martin'], 1)
+]
+
+# If true, show URL addresses after external links.
+#man_show_urls = False
+
+
+# -- Options for Texinfo output -------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+  ('index', 'cutadapt', u'cutadapt Documentation',
+   u'Marcel Martin', 'cutadapt', 'One line description of project.',
+   'Miscellaneous'),
+]
+
+# Documents to append as an appendix to all manuals.
+#texinfo_appendices = []
+
+# If false, no module index is generated.
+#texinfo_domain_indices = True
+
+# How to display URL addresses: 'footnote', 'no', or 'inline'.
+#texinfo_show_urls = 'footnote'
+
+# If true, do not generate a @detailmenu in the "Top" node's menu.
+#texinfo_no_detailmenu = False
diff --git a/doc/guide.rst b/doc/guide.rst
new file mode 100644
index 0000000..d5d7fe6
--- /dev/null
+++ b/doc/guide.rst
@@ -0,0 +1,1373 @@
+==========
+User guide
+==========
+
+Basic usage
+===========
+
+If you just want to trim a 3' adapter, the basic command-line for cutadapt is::
+
+    cutadapt -a AACCGGTT -o output.fastq input.fastq
+
+The sequence of the adapter is given with the ``-a`` option. Of course, you
+need to replace ``AACCGGTT`` with your actual adapter sequence. Reads are read
+from the input file ``input.fastq`` and written to the output file
+``output.fastq``.
+
+Cutadapt searches for the adapter in all reads and removes it when it finds it.
+All reads that were present in the input file will also be present in the output
+file, some of them trimmed, some of them not. Even reads that were trimmed
+entirely (because the adapter was found in the very beginning) are output. All
+of this can be changed with command-line options, explained further down.
+
+A report is printed after cutadapt has finished processing the reads.
+
+
+Input and output file formats
+-----------------------------
+
+Input files for cutadapt need to be in one the these formats:
+
+* FASTA (file name extensions: ``.fasta``, ``.fa``, ``.fna``, ``.csfasta``, ``.csfa``)
+* FASTQ (extensions: ``.fastq``, ``.fq``)
+* A pair of a FASTA file and a ``.(cs)qual`` file
+
+The latter format is (or was) used for colorspace data from the SOLiD
+instruments.
+
+The input file format is recognized from the file name extension (given in
+parentheses in the list above). You can also explicitly specify which format
+the input has by using the ``--format`` option.
+
+The output format is the same as the input format, except for the FASTA/QUAL
+pairs -- those will always be converted to FASTQ. Also, cutadapt does not check
+the output file name: If you input FASTQ data, but use ``-o output.fasta``, then
+the output file will actually be in FASTQ format.
+
+
+Compressed files
+----------------
+
+Cutadapt supports compressed input and output files. Whether an input file
+needs to be decompressed or an output file needs to be compressed is detected
+automatically by inspecting the file name: If it ends in ``.gz``, then gzip
+compression is assumed. You can therefore run cutadapt like this and it works
+as expected::
+
+    cutadapt -a AACCGGTT -o output.fastq.gz input.fastq.gz
+
+All of cutadapt's options that expect a file name support this.
+
+Files compressed with bzip2 (``.bz2``) or xz (``.xz``) are also supported, but
+only if the Python installation includes the proper modules. xz files require
+Python 3.3 or later.
+
+
+Standard input and output
+-------------------------
+
+If no output file is specified via the ``-o`` option, then the output is sent to
+the standard output stream. Instead of the example command line from above, you
+can therefore also write::
+
+    cutadapt -a AACCGGTT input.fastq > output.fastq
+
+There is one difference in behavior if you use cutadapt without ``-o``: The
+report is sent to the standard error stream instead of standard output. You
+can redirect it to a file like this::
+
+    cutadapt -a AACCGGTT input.fastq > output.fastq 2> report.txt
+
+Wherever cutadapt expects a file name, you can also write a dash (``-``) in
+order to specify that standard input or output should be used. For example::
+
+    tail -n 4 input.fastq | cutadapt -a AACCGGTT - > output.fastq
+
+The ``tail -n 4`` prints out only the last four lines of ``input.fastq``, which
+are then piped into cutadapt. Thus, cutadapt will work only on the last read in
+the input file.
+
+In most cases, you should probably use ``-`` at most once for an input file and
+at most once for an output file, in order not to get mixed output.
+
+You cannot combine ``-`` and gzip compression since cutadapt needs to know the
+file name of the output or input file. if you want to have a gzip-compressed
+output file, use ``-o`` with an explicit name.
+
+One last "trick" is to use ``/dev/null`` as an output file name. This special
+file discards everything you send into it. If you only want to see the
+statistics output, for example, and do not care about the trimmed reads at all,
+you could use something like this::
+
+    cutadapt -a AACCGGTT -o /dev/null input.fastq
+
+
+Read processing
+===============
+
+Cutadapt can do a lot more in addition to removing adapters. There are various
+command-line options that make it possible to modify and filter reads and to
+redirect them to various output files. Each read is processed in the following
+way:
+
+1. :ref:`Read modification options <modifying-reads>` are applied. This includes
+   :ref:`adapter removal <removing-adapters>`,
+   :ref:`quality trimming <quality-trimming>`, read name modifications etc.
+2. :ref:`Filtering options <filtering>` are applied, such as removal of too
+   short or untrimmed reads. Some of the filters also allow to redirect a read
+   to a separate output file.
+3. If the read has passed all the filters, it is written to the output file.
+
+
+.. _removing-adapters:
+
+Removing adapters
+=================
+
+Cutadapt supports trimming of multiple types of adapters:
+
+=================================================== ===========================
+Adapter type                                        Command-line option
+=================================================== ===========================
+:ref:`3' adapter <three-prime-adapters>`            ``-a ADAPTER``
+:ref:`5' adapter <five-prime-adapters>`             ``-g ADAPTER``
+:ref:`Anchored 3' adapter <anchored-3adapters>`     ``-a ADAPTER$``
+:ref:`Anchored 5' adapter <anchored-5adapters>`     ``-g ^ADAPTER``
+:ref:`5' or 3' (both possible) <anywhere-adapters>` ``-b ADAPTER``
+:ref:`Linked adapter <linked-adapters>`              ``-a ADAPTER1...ADAPTER2``
+=================================================== ===========================
+
+Here is an illustration of the allowed adapter locations relative to the read
+and depending on the adapter type:
+
+|
+
+.. image:: _static/adapters.svg
+
+|
+
+By default, all adapters :ref:`are searched error-tolerantly <error-tolerance>`.
+Adapter sequences :ref:`may also contain the "N" wildcard
+character <wildcards>`.
+
+In addition, it is possible to :ref:`remove a fixed number of
+bases <cut-bases>` from the beginning or end of each read, and to :ref:`remove
+low-quality bases (quality trimming) <quality-trimming>` from the 3' and 5' ends.
+
+
+.. _three-prime-adapters:
+
+3' adapters
+-----------
+
+A 3' adapter is a piece of DNA ligated to the 3' end of the DNA fragment you
+are interested in. The sequencer starts the sequencing process at the 5' end of
+the fragment and sequences into the adapter if the read is long enough.
+The read that it outputs will then have a part of the adapter in the
+end. Or, if the adapter was short and the read length quite long, then the
+adapter will be somewhere within the read (followed by other bases).
+
+For example, assume your fragment of interest is *MYSEQUENCE* and the adapter is
+*ADAPTER*. Depending on the read length, you will get reads that look like this::
+
+    MYSEQUEN
+    MYSEQUENCEADAP
+    MYSEQUENCEADAPTER
+    MYSEQUENCEADAPTERSOMETHINGELSE
+
+Use cutadapt's ``-a ADAPTER`` option to remove this type of adapter. This will
+be the result::
+
+    MYSEQUEN
+    MYSEQUENCE
+    MYSEQUENCE
+    MYSEQUENCE
+
+As can be seen, cutadapt correctly deals with partial adapter matches, and also
+with any trailing sequences after the adapter. Cutadapt deals with 3' adapters
+by removing the adapter itself and any sequence that may follow. If the sequence
+starts with an adapter, like this::
+
+    ADAPTERSOMETHING
+
+Then the sequence will be empty after trimming. By default, empty reads are kept
+and will appear in the output.
+
+
+.. _five-prime-adapters:
+
+5' adapters
+-----------
+
+.. note::
+    Unless your adapter may also occur in a degraded form, you probably
+    want to use an anchored 5' adapter, described in the next section.
+
+A 5' adapter is a piece of DNA ligated to the 5' end of the DNA fragment of
+interest. The adapter sequence is expected to appear at the start of the read,
+but may be partially degraded. The sequence may also appear somewhere within
+the read. In all cases, the adapter itself and the sequence preceding it is
+removed.
+
+Again, assume your fragment of interest is *MYSEQUENCE* and the adapter is
+*ADAPTER*. The reads may look like this::
+
+    ADAPTERMYSEQUENCE
+    DAPTERMYSEQUENCE
+    TERMYSEQUENCE
+    SOMETHINGADAPTERMYSEQUENCE
+
+All the above sequences are trimmed to ``MYSEQUENCE`` when you use `-g ADAPTER`.
+As with 3' adapters, the resulting read may have a length of zero when the
+sequence ends with the adapter. For example, the read ::
+
+    SOMETHINGADAPTER
+
+will be empty after trimming.
+
+
+.. _anchored-5adapters:
+
+Anchored 5' adapters
+--------------------
+
+In many cases, the above behavior is not really what you want for trimming 5'
+adapters. You may know, for example, that degradation does not occur and that
+the adapter is also not expected to be within the read. Thus, you always expect
+the read to look like the first example from above::
+
+    ADAPTERSOMETHING
+
+If you want to trim only this type of adapter, use ``-g ^ADAPTER``. The ``^`` is
+supposed to indicate the the adapter is "anchored" at the beginning of the read.
+In other words: The adapter is expected to be a prefix of the read. Note that
+cases like these are also recognized::
+
+    ADAPTER
+    ADAPT
+    ADA
+
+The read will simply be empty after trimming.
+
+Be aware that cutadapt still searches for adapters error-tolerantly and, in
+particular, allows insertions. So if your maximum error rate is sufficiently
+high, even this read will be trimmed::
+
+    BADAPTERSOMETHING
+
+The ``B`` in the beginnig is seen as an insertion. If you also want to prevent
+this from happening, use the option ``--no-indels`` to disallow insertions and
+deletions entirely.
+
+
+.. _anchored-3adapters:
+
+Anchored 3' adapters
+--------------------
+
+It is also possible to anchor 3' adapters to the end of the read. This is
+rarely necessary, but if you have merged, for example, overlapping paired-end
+reads, then it is useful. Add the ``$`` character to the end of an
+adapter sequence specified via ``-a`` in order to anchor the adapter to the
+end of the read, such as ``-a ADAPTER$``. The adapter will only be found if it
+is a *suffix* of the read, but errors are still allowed as for 5' adapters.
+You can disable insertions and deletions with ``--no-indels``.
+
+Anchored 3' adapters work as if you had reversed the sequence and used an
+appropriate anchored 5' adapter.
+
+As an example, assume you have these reads::
+
+    MYSEQUENCEADAP
+    MYSEQUENCEADAPTER
+    MYSEQUENCEADAPTERSOMETHINGELSE
+
+Using ``-a ADAPTER$`` will result in::
+
+    MYSEQUENCEADAP
+    MYSEQUENCE
+    MYSEQUENCEADAPTERSOMETHINGELSE
+
+Only the middle read is trimmed at all.
+
+
+.. _linked-adapters:
+
+Linked adapters
+---------------
+
+This is a combination of a 5' and a 3' adapter. Use ``-a ADAPTER1...ADAPTER2``
+to search for a linked adapter. ADAPTER1 is interpreted as an anchored 5'
+adapter, which is searched for first. Only if ADAPTER1 is found will then
+ADAPTER2 be searched for,  which is a regular 3' adapter.
+
+This feature is experimental and will probably break when used in combination
+with some other options, such as ``--info-file``, ``--mask-adapter``.
+
+
+.. _anywhere-adapters:
+
+5' or 3' adapters
+-----------------
+
+The last type of adapter is a combination of the 5' and 3' adapter. You can use
+it when your adapter is ligated to the 5' end for some reads and to the 3' end
+in other reads. This probably does not happen very often, and this adapter type
+was in fact originally implemented because the library preparation in an
+experiment did not work as it was supposed to.
+
+For this type of adapter, the sequence is specified with ``-b ADAPTER`` (or use
+the longer spelling ``--anywhere ADAPTER``). The adapter may appear in the
+beginning (even degraded), within the read, or at the end of the read (even
+partially). The decision which part of the read to remove is made as follows: If
+there is at least one base before the found adapter, then the adapter is
+considered to be a 3' adapter and the adapter itself and everything
+following it is removed. Otherwise, the adapter is considered to be a 5'
+adapter and it is removed from the read, but the sequence after it remains.
+
+Here are some examples.
+
+============================== =================== =====================
+Read before trimming           Read after trimming Detected adapter type
+============================== =================== =====================
+``MYSEQUENCEADAPTERSOMETHING`` ``MYSEQUENCE``      3' adapter
+``MYSEQUENCEADAPTER``          ``MYSEQUENCE``      3' adapter
+``MYSEQUENCEADAP``             ``MYSEQUENCE``      3' adapter
+``MADAPTER``                   ``M``               3' adapter
+``ADAPTERMYSEQUENCE``          ``MYSEQUENCE``      5' adapter
+``PTERMYSEQUENCE``             ``MYSEQUENCE``      5' adapter
+``TERMYSEQUENCE``              ``MYSEQUENCE``      5' adapter
+============================== =================== =====================
+
+The ``-b`` option cannot be used with colorspace data.
+
+
+.. _error-tolerance:
+
+Error tolerance
+---------------
+
+All searches for adapter sequences are error tolerant. Allowed errors are
+mismatches, insertions and deletions. For example, if you search for the
+adapter sequence ``ADAPTER`` and the error tolerance is set appropriately
+(as explained below), then also ``ADABTER`` will be found (with 1 mismatch),
+as well as ``ADAPTR`` (with 1 deletion), and also ``ADAPPTER`` (with 1
+insertion).
+
+The level of error tolerance is adjusted by specifying a *maximum error rate*,
+which is 0.1 (=10%) by default. Use the ``-e`` option to set a different value.
+To determine the number of allowed errors, the maximum error rate is multiplied
+by the length of the match (and then rounded off).
+
+What does that mean?
+Assume you have a long adapter ``LONGADAPTER`` and it appears in full somewhere
+within the read. The length of the match is 11 characters since the full adapter
+has a length of 11, therefore 11·0.1=1.1 errors are allowed with the default
+maximum error rate of 0.1. This is rounded off to 1 allowed error. So the
+adapter will be found within this read::
+
+    SEQUENCELONGADUPTERSOMETHING
+
+If the match is a bit shorter, however, the result is different::
+
+    SEQUENCELONGADUPT
+
+Only 9 characters of the adapter match: ``LONGADAPT`` matches ``LONGADUPT``
+with one substitution. Therefore, only 9·0.1=0.9 errors are allowed. Since this
+is rounded off to zero allowed errors, the adapter will not be found.
+
+The number of errors allowed for a given adapter match length is also shown in
+the report that cutadapt prints::
+
+    Sequence: 'LONGADAPTER'; Length: 11; Trimmed: 2 times.
+
+    No. of allowed errors:
+    0-9 bp: 0; 10-11 bp: 1
+
+This tells us what we now already know: For match lengths of 0-9 bases, zero
+errors are allowed and for matches of length 10-11 bases, one error is allowed.
+
+The reason for this behavior is to ensure that short matches are not favored
+unfairly. For example, assume the adapter has 40 bases and the maximum error
+rate is 0.1, which means that four errors are allowed for full-length matches.
+If four errors were allowed even for a short match such as one with 10 bases, this would
+mean that the error rate for such a case is 40%, which is clearly not what was
+desired.
+
+Insertions and deletions can be disallowed by using the option
+``--no-indels``.
+
+See also the :ref:`section on details of the alignment algorithm <algorithm>`.
+
+
+Multiple adapter occurrences within a single read
+-------------------------------------------------
+
+If a single read contains multiple copies of the same adapter, the basic rule is
+that the leftmost match is used for both 5' and 3' adapters. For example, when
+searching for a 3' adapter in ::
+
+    cccccADAPTERgggggADAPTERttttt
+
+the read will be trimmed to ::
+
+    ccccc
+
+When the adapter is a 5' adapter instead, the read will be trimmed to ::
+
+    gggggADAPTERttttt
+
+The above applies when both occurrences of the adapter are *exact* matches, and
+it also applies when both occurrences of the adapter are *inexact* matches (that
+is, it has at least one indel or mismatch). However, if one match is exact, but
+the other is inexact, then the exact match wins, even if it is not the leftmost
+one! The reason for this behavior is that cutadapt searches for exact matches
+first and, to improve performance, skips the error-tolerant matching step if an
+exact match was found.
+
+
+Reducing random matches
+-----------------------
+
+Since cutadapt allows partial matches between the read and the adapter sequence,
+short matches can occur by chance, leading to erroneously trimmed bases. For
+example, roughly 25% of all reads end with a base that is identical to the
+first base of the adapter. To reduce the number of falsely trimmed bases,
+the alignment algorithm requires that at least *three bases* match between
+adapter and read. The minimum overlap length can be changed with the parameter
+``--overlap`` (or its short version ``-O``). Shorter matches are simply
+ignored, and the bases are not trimmed.
+
+Requiring at least three bases to match is quite conservative. Even if no
+minimum overlap was required, we can compute that we lose only about 0.44 bases
+per read on average, see `Section 2.3.3 in my
+thesis <http://hdl.handle.net/2003/31824>`_. With the default minimum
+overlap length of 3, only about 0.07 bases are lost per read.
+
+When choosing an appropriate minimum overlap length, take into account that
+true adapter matches are also lost when the overlap length is higher than
+zero, reducing cutadapt's sensitivity.
+
+
+.. _wildcards:
+
+Wildcards
+---------
+
+All `IUPAC nucleotide codes <http://www.bioinformatics.org/sms/iupac.html>`_
+(wildcard characters) are supported. For example, use an ``N`` in the adapter
+sequence to match any nucleotide in the read, or use ``-a YACGT`` for an adapter
+that matches both ``CACGT`` and ``TACGT``. The wildcard character ``N`` is
+useful for trimming adapters with an embedded variable barcode::
+
+    cutadapt -a ACGTAANNNNTTAGC -o output.fastq input.fastq
+
+Wildcard characters in the adapter are enabled by default. Use the option ``-N``
+to disable this.
+
+Matching of wildcards in the reads is also possible, but disabled by default
+in order to avoid matches in reads that consist of many (often low-quality)
+``N`` bases. Use ``--match-read-wildcards`` to enable wildcards also in reads.
+
+If wildcards are disabled entirely (that is, you use ``-N`` and *do not* use
+``--match-read-wildcards``), then cutadapt compares characters by ASCII value.
+Thus, both the read and adapter can be arbitrary strings (such as ``SEQUENCE``
+or ``ADAPTER`` as used here in the examples).
+
+Wildcards do not work in colorspace.
+
+
+Repeated bases in the adapter sequence
+--------------------------------------
+
+If you have many repeated bases in the adapter sequence, such as many ``N``s or
+many ``A``s, you do not have to spell them out. For example, instead of writing
+ten ``A`` in a row (``AAAAAAAAAA``), write ``A{10}`` instead. The number within
+the curly braces specifies how often the character that preceeds it will be
+repeated. This works also for IUPAC wildcard characters, as in ``N{5}``.
+
+It is recommended that you use quotation marks around your adapter sequence if
+you use this feature. For poly-A trimming, for example, you would write::
+
+    cutadapt -a "A{100}" -o output.fastq input.fastq
+
+
+.. _modifying-reads:
+
+Modifying reads
+===============
+
+This section describes in which ways reads can be modified other than adapter
+removal.
+
+.. _cut-bases:
+
+Removing a fixed number of bases
+--------------------------------
+
+By using the ``--cut`` option or its abbreviation ``-u``, it is possible to
+unconditionally remove bases from the beginning or end of each read. If
+the given length is positive, the bases are removed from the beginning
+of each read. If it is negative, the bases are removed from the end.
+
+For example, to remove the first five bases of each read::
+
+    cutadapt -u 5 -o trimmed.fastq reads.fastq
+
+To remove the last seven bases of each read::
+
+    cutadapt -u -7 -o trimmed.fastq reads.fastq
+
+The ``-u``/``--cut`` option can be combined with the other options, but
+the desired bases are removed *before* any adapter trimming.
+
+
+.. _quality-trimming:
+
+Quality trimming
+----------------
+
+The ``-q`` (or ``--trim-qualities``) parameter can be used to trim
+low-quality ends from reads before adapter removal. For this to work
+correctly, the quality values must be encoded as ascii(phred quality +
+33). If they are encoded as ascii(phred quality + 64), you need to add
+``--quality-base=64`` to the command line.
+
+Quality trimming can be done without adapter trimming, so this will work::
+
+    cutadapt -q 10 -o output.fastq input.fastq
+
+By default, only the 3' end of each read is quality-trimmed. If you want to
+trim the 5' end as well, use the ``-q`` option with two comma-separated cutoffs::
+
+    cutadapt -q 15,10 -o output.fastq input.fastq
+
+The 5' end will then be trimmed with a cutoff of 15, and the 3' will be trimmed
+with a cutoff of 10. If you only want to trim the 5' end, then use a cutoff of
+0 for the 3' end, as in ``-q 10,0``.
+
+
+Quality trimming algorithm
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The trimming algorithm is the same as the one used by BWA, but applied to both
+ends of the read in turn (if requested). That is: Subtract the given cutoff
+from all qualities; compute partial sums from all indices to the end of the
+sequence; cut the sequence at the index at which the sum is minimal. If both
+ends are to be trimmed, repeat this for the other end.
+
+The basic idea is to remove all bases starting from the end of the read whose
+quality is smaller than the given threshold. This is refined a bit by allowing
+some good-quality bases among the bad-quality ones. In the following example,
+we assume that the 3' end is to be quality-trimmed.
+
+Assume you use a threshold of 10 and have these quality values:
+
+42, 40, 26, 27, 8, 7, 11, 4, 2, 3
+
+Subtracting the threshold gives:
+
+32, 30, 16, 17, -2, -3, 1, -6, -8, -7
+
+Then sum up the numbers, starting from the end (partial sums). Stop early if
+the sum is greater than zero:
+
+(70), (38), 8, -8, -25, -23, -20, -21, -15, -7
+
+The numbers in parentheses are not computed (because 8 is greater than zero),
+but shown here for completeness. The position of the minimum (-25) is used as
+the trimming position. Therefore, the read is trimmed to the first four bases,
+which have quality values 42, 40, 26, 27.
+
+
+Modifying read names
+--------------------
+
+If you feel the need to modify the names of processed reads, some of the
+following options may be useful.
+
+Use ``-y`` or ``--suffix`` to append a text to read names. The given string can
+contain the placeholder ``{name}``, which will be replaced with the name of the
+adapter found in that read. For example, writing ::
+
+    cutadapt -a adapter1=ACGT -y ' we found {name}' input.fastq
+
+changes a read named ``read1`` to ``read1 we found adapter1`` if the adapter
+``ACGT`` was found. The options ``-x``/``--prefix`` work the same, but the text
+is added in front of the read name. For both options, spaces need to be
+specified explicitly, as in the above example. If no adapter was found in a
+read, the text ``no_adapter`` is inserted for ``{name}``.
+
+In order to remove a suffix of each read name, use ``--strip-suffix``.
+
+Some old 454 read files contain the length of the read in the name::
+
+    >read1 length=17
+    ACGTACGTACAAAAAAA
+
+If you want to update this to the correct length after trimming, use the option
+``--length-tag``. In this example, this would be ``--length-tag 'length='``.
+After trimming, the read would perhaps look like this::
+
+    >read1 length=10
+    ACGTACGTAC
+
+
+Read modification order
+-----------------------
+
+The read modifications described above are applied in the following order to
+each read. Steps not requested on the command-line are skipped.
+
+1. Unconditional base removal with ``--cut``
+2. Quality trimming (``-q``)
+3. Adapter trimming (``-a``, ``-b``, ``-g`` and uppercase versions)
+4. N-end trimming (``--trim-n``)
+5. Length tag modification (``--length-tag``)
+6. Read name suffixe removal (``--strip-suffix``)
+7. Addition of prefix and suffix to read name (``-x``/``--prefix`` and ``-y``/``--suffix``)
+8. Double-encode the sequence (only colorspace)
+9. Replace negative quality values with zero (zero capping, only colorspace)
+10. Trim primer base (only colorspace)
+
+The last three steps are colorspace-specific.
+
+
+.. _filtering:
+
+Filtering reads
+===============
+
+By default, all processed reads, no matter whether they were trimmed are not,
+are written to the output file specified by the ``-o`` option (or to standard
+output if ``-o`` was not provided). For paired-end reads, the second read in a
+pair is always written to the file specified by the ``-p`` option.
+
+The options described here make it possible to filter reads by either discarding
+them entirely or by redirecting them to other files. When redirecting reads,
+the basic rule is that *each read is written to at most one file*. You cannot
+write reads to more than one output file.
+
+In the following, the term "processed read" refers to a read to which all
+modifications have been applied (adapter removal, quality trimming etc.). A
+processed read can be identical to the input read if no modifications were done.
+
+
+``--minimum-length N`` or ``-m N``
+    Throw away processed reads shorter than *N* bases.
+
+``--too-short-output FILE``
+    Instead of throwing away the reads that are too short according to ``-m``,
+    write them to *FILE* (in FASTA/FASTQ format).
+
+``--maximum-length N`` or ``-M N``
+    Throw away processed reads longer than *N* bases.
+
+``--too-long-output FILE``
+    Instead of throwing away the reads that are too long (according to ``-M``),
+    write them to *FILE* (in FASTA/FASTQ format).
+
+``--untrimmed-output FILE``
+    Write all reads without adapters to *FILE* (in FASTA/FASTQ format) instead
+    of writing them to the regular output file.
+
+``--discard-trimmed``
+   Throw away reads in which an adapter was found.
+
+``--discard-untrimmed``
+   Throw away reads in which *no* adapter was found. This has the same effect as
+   specifying ``--untrimmed-output /dev/null``.
+
+The options ``--too-short-output`` and ``--too-long-output`` are applied first.
+This means, for example, that a read that is too long will never end up in the
+``--untrimmed-output`` file when ``--too-long-output`` was given, no matter
+whether it was trimmed or not.
+
+The options ``--untrimmed-output``, ``--discard-trimmed`` and ``-discard-untrimmed``
+are mutually exclusive.
+
+
+.. _paired-end:
+
+Trimming paired-end reads
+=========================
+
+Cutadapt supports trimming of paired-end reads, trimming both reads in a pair
+at the same time.
+
+Assume the input is in ``reads.1.fastq`` and ``reads.2.fastq`` and that
+``ADAPTER_FWD`` should be trimmed from the forward reads (first file)
+and ``ADAPTER_REV`` from the reverse reads (second file).
+
+The basic command-line is::
+
+    cutadapt -a ADAPTER_FWD -A ADAPTER_REV -o out.1.fastq -p out.2.fastq reads.1.fastq reads.2.fastq
+
+``-p`` is the short form of ``--paired-output``. The option ``-A`` is used here
+to specify an adapter sequence that cutadapt
+should remove from the second read in each pair. There are also the options
+``-G``, ``-B``. All of them work just like their lowercase counterparts,
+except that the adapter is searched for in the second read in each paired-end
+read. There is also option ``-U``, which you can use to remove a fixed number
+of bases from the second read in a pair.
+
+While it is possible to run cutadapt on the two files separately, processing
+both files at the same time is highly recommended since the program can check
+for problems in your input files only when they are processed together.
+
+When you use ``-p``/``--paired-output``, cutadapt checks whether the files are
+properly paired. An error is raised if one of the files contains more reads than
+the other or if the read names in the two files do not match. Only the part of
+the read name before the first space is considered. If the read name ends with
+``/1`` or ``/2``, then that is also ignored. For example, two FASTQ headers that
+would be considered to denote properly paired reads are::
+
+    @my_read/1 a comment
+
+and::
+
+    @my_read/2 another comment
+
+This is an example for *improperly paired* read names::
+
+    @my_read/1;1
+
+and::
+
+    @my_read/2;1
+
+Since the ``/1`` and ``/2`` are ignored only if the occur at the end of the read
+name, and since the ``;1`` is considered to be part of the read name, these
+reads will not be considered to be propely paired.
+
+As soon as you start to use one of the filtering options that discard reads, it
+is mandatory you process both files at the same time to make sure that the
+output files are kept synchronized: If a read is removed from one of the files,
+cutadapt will ensure it is also removed from the other file.
+
+
+The following command-line options are applied to *both* reads:
+
+* ``-q`` (along with ``--quality-base``)
+* ``--times`` applies to all the adapters given
+* ``--no-trim``
+* ``--trim-n``
+* ``--mask``
+* ``--length-tag``
+* ``--prefix``, ``--suffix``
+* ``--strip-f3``
+* ``--colorspace``, ``--bwa``, ``-z``, ``--no-zero-cap``, ``--double-encode``,
+  ``--trim-primer``
+
+The following limitations still exist:
+
+* The ``--info-file``, ``--rest-file`` and ``--wildcard-file`` options write out
+  information only from the first read.
+* Demultiplexing is not yet supported with paired-end data.
+
+
+
+.. _filtering-paired:
+
+Filtering paired-end reads
+--------------------------
+
+The :ref:`filtering options listed above <filtering>` can also be used when
+trimming paired-end data. Since there are two reads, however, the filtering
+criteria are checked for both reads. The question is what to do when a criterion
+applies to only one read and not the other.
+
+By default, the filtering options discard or redirect the read pair if *any*
+of the two reads fulfill the criteria. That is, ``--max-n`` discards the pair
+if one of the two reads has too many ``N`` bases; ``--discard-untrimmed``
+discards the pair if one of the reads does not contain an adapter;
+``--minimum-length`` discards the pair if one of the reads is too short;
+and ``--maximum-length`` discards the pair if one of the reads is too long.
+Note that the ``--discard-trimmed`` filter would also apply because it is also
+the case that at least one of the reads is *trimmed*!
+
+To require that filtering criteria must apply to *both* reads in order for a
+read pair to be considered "filtered", use the option ``--pair-filter=both``.
+
+To further complicate matters, cutadapt switches to a backwards compatibility
+mode ("legacy mode") when none of the uppercase modification options
+(``-A``/``-B``/``-G``/``-U``) are given. In that mode, filtering criteria are
+checked only for the *first* read. Cutadapt will also tell you at the top of
+the report whether legacy mode is active. Check that line if you get strange
+results!
+
+These are the paired-end specific filtering and output options:
+
+``--paired-output FILE`` or ``-p FILE``
+    Write the second read of each processed pair to *FILE* (in FASTA/FASTQ
+    format).
+
+``--untrimmed-paired-output FILE``
+    Used together with ``--untrimmed-output``. The second read in a pair is
+    written to this file when the processed pair was *not* trimmed.
+
+``--pair-filter=(any|both)``
+    Which of the reads in a paired-end read have to match the filtering
+    criterion in order for it to be filtered.
+
+Note that the option names can be abbreviated as long as it is clear which
+option is meant (unique prefix). For example, instead of ``--untrimmed-output``
+and ``--untrimmed-paired-output``, you can write ``--untrimmed-o`` and
+``--untrimmed-p``.
+
+
+Interleaved paired-end reads
+----------------------------
+
+Paired-end reads can be read from a single FASTQ file in which the entries for
+the first and second read from each pair alternate. The first read in each pair
+comes before the second. Enable this file format by adding the ``--interleaved``
+option to the command-line. For example::
+
+    cutadapt --interleaved -q 20 -a ACGT -A TGCA -o trimmed.fastq reads.fastq
+
+The output FASTQ file will also be written interleaved. Cutadapt will detect if
+the input file is not properly interleaved by checking whether read names match
+and whether the file contains an even number of entries.
+
+When ``--interleaved`` is used, legacy mode is disabled (that is,
+read-modification options such as ``-q`` always apply to both reads).
+
+
+Legacy paired-end read trimming
+-------------------------------
+
+.. note::
+    This section describes the way paired-end trimming was done
+    in cutadapt before 1.8, where the ``-A``, ``-G``, ``-B`` options were not
+    available. It is less safe and more complicated, but you can still use it.
+
+If you do not use any of the filtering options that discard reads, such
+as ``--discard``, ``--minimum-length`` or ``--maximum-length``, you can run
+cutadapt on each file separately::
+
+    cutadapt -a ADAPTER_FWD -o trimmed.1.fastq reads1.fastq
+    cutadapt -a ADAPTER_REV -o trimmed.2.fastq reads2.fastq
+
+You can use the options that are listed under 'Additional modifications'
+in cutadapt's help output without problems. For example, if you want to
+quality-trim the first read in each pair with a threshold of 10, and the
+second read in each pair with a threshold of 15, then the commands could
+be::
+
+    cutadapt -q 10 -a ADAPTER_FWD -o trimmed.1.fastq reads1.fastq
+    cutadapt -q 15 -a ADAPTER_REV -o trimmed.2.fastq reads2.fastq
+
+If you use any of the filtering options, you must use cutadapt in the following
+way (with the ``-p`` option) to make sure that read pairs remain sychronized.
+
+First trim the forward read, writing output to temporary files (we also
+add some quality trimming)::
+
+    cutadapt -q 10 -a ADAPTER_FWD --minimum-length 20 -o tmp.1.fastq -p tmp.2.fastq reads.1.fastq reads.2.fastq
+
+Then trim the reverse read, using the temporary files as input::
+
+    cutadapt -q 15 -a ADAPTER_REV --minimum-length 20 -o trimmed.2.fastq -p trimmed.1.fastq tmp.2.fastq tmp.1.fastq
+
+Finally, remove the temporary files::
+
+    rm tmp.1.fastq tmp.2.fastq
+
+Please see the previous section for a much simpler way of trimming paired-end
+reads!
+
+In legacy paired-end mode, the read-modifying options such as ``-q`` only
+apply to the first file in each call to cutadapt (first ``reads.1.fastq``, then
+``tmp.2.fastq`` in this example). Reads in the second file are not affected by those
+options, but by the filtering options: If a read in the first file is
+discarded, then the matching read in the second file is also filtered
+and not written to the output given by ``--paired-output`` in order to
+keep both output files synchronized.
+
+
+.. _multiple-adapters:
+
+Multiple adapters
+=================
+
+It is possible to specify more than one adapter sequence by using the options
+``-a``, ``-b`` and ``-g`` more than once. Any combination is allowed, such as
+five ``-a`` adapters and two ``-g`` adapters. Each read will be searched for
+all given adapters, but **only the best matching adapter is removed**. (But it
+is possible to :ref:`trim more than one adapter from each
+read <more-than-one>`). This is how a command may look like to trim one of two
+possible 3' adapters::
+
+    cutadapt -a TGAGACACGCA -a AGGCACACAGGG -o output.fastq input.fastq
+
+The adapter sequences can also be read from a FASTA file. Instead of giving an
+explicit adapter sequence, you need to write ``file:`` followed by the name of
+the FASTA file::
+
+    cutadapt -a file:adapters.fasta -o output.fastq input.fastq
+
+All of the sequences in the file ``adapters.fasta`` will be used as 3'
+adapters. The other adapter options ``-b`` and ``-g`` also support this. Again,
+only the best matching adapter is trimmed from each read.
+
+When cutadapt has multiple adapter sequences to work with, either specified
+explicitly on the command line or via a FASTA file, it decides in the
+following way which adapter should be trimmed:
+
+* All given adapter sequences are matched to the read.
+* Adapter matches where the overlap length (see the ``-O`` parameter) is too
+  small or where the error rate is too high (``-e``) are removed from further
+  consideration.
+* Among the remaining matches, the one with the **greatest number of matching
+  bases** is chosen.
+* If there is a tie, the first adapter wins. The order of adapters is the order
+  in which they are given on the command line or in which they are found in the
+  FASTA file.
+
+If your adapter sequences are all similar and differ only by a variable barcode
+sequence, you should use a single adapter sequence instead that
+:ref:`contains wildcard characters <wildcards>`.
+
+
+.. _named-adapters:
+
+Named adapters
+--------------
+
+Cutadapt reports statistics for each adapter separately. To identify the
+adapters, they are numbered and the adapter sequence is also printed::
+
+    === Adapter 1 ===
+
+    Sequence: AACCGGTT; Length 8; Trimmed: 5 times.
+
+If you want this to look a bit nicer, you can give each adapter a name in this
+way::
+
+    cutadapt -a My_Adapter=AACCGGTT -o output.fastq input.fastq
+
+The actual adapter sequence in this example is ``AACCGGTT`` and the name
+assigned to it is ``My_Adapter``. The report will then contain this name in
+addition to the other information::
+
+    === Adapter 'My_Adapter' ===
+
+    Sequence: TTAGACATATCTCCGTCG; Length 18; Trimmed: 5 times.
+
+When adapters are read from a FASTA file, the sequence header is used as the
+adapter name.
+
+Adapter names are also used in column 8 of :ref:`info files <info-file>`.
+
+
+.. _demultiplexing:
+
+Demultiplexing
+--------------
+
+Cutadapt supports demultiplexing, which means that reads are written to different
+output files depending on which adapter was found in them. To use this, include
+the string ``{name}`` in the name of the output file and give each adapter a name.
+The path is then interpreted as a template and each trimmed read is written
+to the path in which ``{name}`` is replaced with the name of the adapter that
+was found in the read. Reads in which no adapter was found will be written to a
+file in which ``{name}`` is replaced with ``unknown``.
+
+.. note:
+    Demultiplexing is currently only supported for single-end reads. Paired-end
+    support is planned for one of the next versions.
+
+Example::
+
+    cutadapt -a one=TATA -a two=GCGC -o trimmed-{name}.fastq.gz input.fastq.gz
+
+This command will create the three files ``demulti-one.fastq.gz``,
+``demulti-two.fastq.gz`` and ``demulti-unknown.fastq.gz``. You can :ref:`also
+provide adapter sequences in a FASTA file <multiple-adapters>`.
+
+In order to not trim the input files at all, but to only do multiplexing, use
+option ``--no-trim``. And if you want to output the reads in which no
+adapters were found to a different file, use the ``--untrimmed-output``
+parameter with a file name. Here is an example that uses both parameters and
+reads the adapters from a FASTA file (note that ``--untrimmed-output`` can be
+abbreviated)::
+
+    cutadapt -a file:barcodes.fasta --no-trim --untrimmed-o untrimmed.fastq.gz -o trimmed-{name}.fastq.gz input.fastq.gz
+
+
+.. _more-than-one:
+
+Trimming more than one adapter from each read
+---------------------------------------------
+
+By default, at most one adapter sequence is removed from each read, even if
+multiple adapter sequences were provided. This can be changed by using the
+``--times`` option (or its abbreviated form ``-n``). Cutadapt will then search
+for all the given adapter sequences repeatedly, either until no adapter match
+was found or until the specified number of rounds was reached.
+
+As an example, assume you have a protocol in which a 5' adapter gets ligated
+to your DNA fragment, but it's possible that the adapter is ligated more than
+once. So your sequence could look like this::
+
+    ADAPTERADAPTERADAPTERMYSEQUENCE
+
+To be on the safe side, you assume that there are at most 5 copies of the
+adapter sequence. This command can be used to trim the reads correctly::
+
+    cutadapt -g ^ADAPTER -n 5 -o output.fastq input.fastq
+
+This feature can also be used to search for *5'/3' linked adapters*. For example,
+when the 5' adapter is *FIRST* and the 3' adapter is *SECOND*, then the read
+could look like this::
+
+    FIRSTMYSEQUENCESECOND
+
+That is, the sequence of interest is framed by the 5' and the 3' adapter. The
+following command can be used to trim such a read::
+
+    cutadapt -g ^FIRST -a SECOND -n 2 ...
+
+Support for linked adapters is currently incomplete. For example, it is not
+possible to specify that SECOND should only be trimmed when FIRST also occurs.
+`See also this feature
+request <https://code.google.com/p/cutadapt/issues/detail?id=34>`_, and
+comment on it if you would like to see this implemented.
+
+
+.. _truseq:
+
+Illumina TruSeq
+===============
+
+If you have reads containing Illumina TruSeq adapters, follow these
+steps.
+
+Single-end reads as well as the first reads of paired-end data need to be
+trimmed with ``A`` + the “TruSeq Indexed Adapter”. Use only the prefix of the
+adapter sequence that is common to all Indexed Adapter sequences::
+
+    cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC -o trimmed.fastq.gz reads.fastq.gz
+
+If you have paired-end data, trim also read 2 with the reverse complement of the
+“TruSeq Universal Adapter”. The full command-line looks as follows::
+
+    cutadapt \
+		-a AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC \
+		-A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
+		-o trimmed.1.fastq.gz -p trimmed.2.fastq.gz \
+		reads.1.fastq.gz reads.2.fastq.gz
+
+See also the :ref:`section about paired-end adapter trimming above <paired-end>`.
+
+If you want to simplify this a bit, you can also use the common prefix
+``AGATCGGAAGAGC`` as the adapter sequence in both cases::
+
+    cutadapt \
+		-a AGATCGGAAGAGC -A AGATCGGAAGAGC \
+		-o trimmed.1.fastq.gz -p trimmed.2.fastq.gz \
+		reads.1.fastq.gz reads.2.fastq.gz
+
+The adapter sequences can be found in the document `Illumina TruSeq Adapters
+De-Mystified <http://tucf-genomics.tufts.edu/documents/protocols/TUCF_Understanding_Illumina_TruSeq_Adapters.pdf>`__.
+
+
+.. _warnbase:
+
+Warning about incomplete adapter sequences
+------------------------------------------
+
+Sometimes cutadapt’s report ends with these lines::
+
+    WARNING:
+        One or more of your adapter sequences may be incomplete.
+        Please see the detailed output above.
+
+Further up, you’ll see a message like this::
+
+    Bases preceding removed adapters:
+      A: 95.5%
+      C: 1.0%
+      G: 1.6%
+      T: 1.6%
+      none/other: 0.3%
+    WARNING:
+        The adapter is preceded by "A" extremely often.
+        The provided adapter sequence may be incomplete.
+        To fix the problem, add "A" to the beginning of the adapter sequence.
+
+This means that in 95.5% of the cases in which an adapter was removed from a
+read, the base coming *before* that was an ``A``. If your DNA fragments are
+not random, such as in amplicon sequencing, then this is to be expected and
+the warning can be ignored. If the DNA fragments are supposed to be random,
+then the message may be genuine: The adapter sequence may be incomplete and
+should include an additional ``A`` in the beginning.
+
+This warning exists because some documents list the Illumina TruSeq adapters
+as starting with ``GATCGGA...``. While that is technically correct, the
+library preparation actually results in an additional ``A`` before that
+sequence, which also needs to be removed. See the :ref:`previous
+section <truseq>` for the correct sequence.
+
+
+.. _dealing-with-ns:
+
+Dealing with ``N`` bases
+========================
+
+Cutadapt supports the following options to deal with ``N`` bases in your reads:
+
+``--max-n COUNT``
+    Discard reads containing more than *COUNT* ``N`` bases. A fractional *COUNT*
+    between 0 and 1 can also be given and will be treated as the proportion of
+    maximally allowed ``N`` bases in the read.
+
+``--trim-n``
+    Remove flanking ``N`` bases from each read. That is, a read such as this::
+
+        NNACGTACGTNNNN
+
+    Is trimmed to just ``ACGTACGT``. This option is applied *after* adapter
+    trimming. If you want to get rid of ``N`` bases before adapter removal, use
+    quality trimming: ``N`` bases typically also have a low quality value
+    associated with them.
+
+
+.. _bisulfite:
+
+Bisulfite sequencing (RRBS)
+===========================
+
+When trimming reads that come from a library prepared with the RRBS (reduced
+representation bisulfit sequencing) protocol, the last two 3' bases must be
+removed in addition to the adapter itself. This can be achieved by using not
+the adapter sequence itself, but by adding two wildcard characters to its
+beginning. If the adapter sequence is ``ADAPTER``, the command for trimming
+should be::
+
+    cutadapt -a NNADAPTER -o output.fastq input.fastq
+
+Details can be found in `Babraham bioinformatics' "Brief guide to
+RRBS" <http://www.bioinformatics.babraham.ac.uk/projects/bismark/RRBS_Guide.pdf>`_.
+A summary follows.
+
+During RRBS library preparation, DNA is digested with the restriction enzyme
+MspI, generating a two-base overhang on the 5' end (``CG``). MspI recognizes
+the sequence ``CCGG`` and cuts
+between ``C`` and ``CGG``. A double-stranded DNA fragment is cut in this way::
+
+    5'-NNNC|CGGNNN-3'
+    3'-NNNGGC|CNNN-5'
+
+The fragment between two MspI restriction sites looks like this::
+
+    5'-CGGNNN...NNNC-3'
+      3'-CNNN...NNNGGC-5'
+
+Before sequencing (or PCR) adapters can be ligated, the missing base positions
+must be filled in with GTP and CTP::
+
+    5'-ADAPTER-CGGNNN...NNNCcg-ADAPTER-3'
+    3'-ADAPTER-gcCNNN...NNNGGC-ADAPTER-5'
+
+The filled-in bases, marked in lowercase above, do not contain any original
+methylation information, and must therefore not be used for methylation calling.
+By prefixing the adapter sequence with ``NN``, the bases will be automatically
+stripped during adapter trimming.
+
+
+Cutadapt's output
+=================
+
+
+How to read the report
+----------------------
+
+After every run, cutadapt prints out per-adapter statistics. The output
+starts with something like this::
+
+    Sequence: 'ACGTACGTACGTTAGCTAGC'; Length: 20; Trimmed: 2402 times.
+
+The meaning of this should be obvious.
+
+The next piece of information is this::
+
+    No. of allowed errors:
+    0-9 bp: 0; 10-19 bp: 1; 20 bp: 2
+
+The adapter has, as was shown above, has a length of 20
+characters. We are using the default error rate of 0.1. What this
+implies is shown above: Matches up to a length of 9 bp are allowed to
+have no errors. Matches of lengths 10-19 bp are allowd to have 1 error
+and matches of length 20 can have 2 errors. See also :ref:`the section about
+error-tolerant matching <error-tolerance>`.
+
+Finally, a table is output that gives more detailed information about
+the lengths of the removed sequences. The following is only an excerpt;
+some rows are left out::
+
+    Overview of removed sequences
+    length  count   expect  max.err error counts
+    3       140     156.2   0       140
+    4       57      39.1    0       57
+    5       50      9.8     0       50
+    6       35      2.4     0       35
+    ...
+    100     397     0.0     3       358 36 3
+
+The first row tells us the following: Three bases were removed in 140
+reads; randomly, one would expect this to occur 156.2 times; the maximum
+number of errors at that match length is 0 (this is actually redundant
+since we know already that no errors are allowed at lengths 0-9 bp).
+
+The last column shows the number of reads that had 0, 1, 2 ... errors.
+In the last row, for example, 358 reads matched the adapter with zero
+errors, 36 with 1 error, and 3 matched with 2 errors.
+
+The "expect" column gives only a rough estimate of the number of
+sequences that is expected to match randomly (it assumes a GC content of
+50%, for example), but it can help to estimate whether the matches that
+were found are true adapter matches or if they are due to chance. At
+lengths 6, for example, only 2.4 reads are expected, but 35 do match,
+which hints that most of these matches are due to actual adapters.
+
+Note that the "length" column refers to the length of the removed
+sequence. That is, the actual length of the match in the above row at
+length 100 is 20 since that is the adapter length. Assuming the read
+length is 100, the adapter was found in the beginning of 397 reads and
+therefore those reads were trimmed to a length of zero.
+
+The table may also be useful in case the given adapter sequence contains
+an error. In that case, it may look like this::
+
+    ...
+    length  count   expect  max.err error counts
+    10      53      0.0     1       51 2
+    11      45      0.0     1       42 3
+    12      51      0.0     1       48 3
+    13      39      0.0     1       0 39
+    14      40      0.0     1       0 40
+    15      36      0.0     1       0 36
+    ...
+
+We can see that no matches longer than 12 have zero errors. In this
+case, it indicates that the 13th base of the given adapter sequence is
+incorrect.
+
+
+.. _info-file:
+
+Format of the info file
+-----------------------
+
+When the ``--info-file`` command-line parameter is given, detailed
+information about the found adapters is written to the given file. The
+output is a tab-separated text file. Each line corresponds to one read
+of the input file (unless `--times` is used, see below). The fields are:
+
+1. Read name
+2. Number of errors
+3. 0-based start coordinate of the adapter match
+4. 0-based end coordinate of the adapter match
+5. Sequence of the read to the left of the adapter match (can be empty)
+6. Sequence of the read that was matched to the adapter
+7. Sequence of the read to the right of the adapter match (can be empty)
+8. Name of the found adapter.
+9. Quality values corresponding to sequence left of the adapter match (can be empty)
+10. Quality values corresponding to sequence matched to the adapter (can be empty)
+11. Quality values corresponding to sequence to the right of the adapter match (can be empty)
+
+The concatenation of the fields 5-7 yields the full read sequence. Column 8 identifies
+the found adapter. `The section about named adapters <named-adapters>` describes
+how to give a name to an adapter. Adapters without a name are numbered starting
+from 1. Fields 9-11 are empty if quality values are not available.
+Concatenating them yields the full sequence of quality values.
+
+If no adapter was found, the format is as follows:
+
+1. Read name
+2. The value -1
+3. The read sequence
+4. Quality values
+
+When parsing the file, be aware that additional columns may be added in
+the future. Note also that some fields can be empty, resulting in
+consecutive tabs within a line.
+
+If the ``--times`` option is used and greater than 1, each read can appear
+more than once in the info file. There will be one line for each found adapter,
+all with identical read names. Only for the first of those lines will the
+concatenation of columns 5-7 be identical to the original read sequence (and
+accordingly for columns 9-11). For subsequent lines, the shown sequence are the
+ones that were used in subsequent rounds of adapter trimming, that is, they get
+successively shorter.
+
+Columns 9-11 have been added in cutadapt version 1.9.
+
+
+.. _algorithm:
+
+The alignment algorithm
+=======================
+
+Since the publication of the `EMBnet journal application note about
+cutadapt <http://dx.doi.org/10.14806/ej.17.1.200>`_, the alignment algorithm
+used for finding adapters has changed significantly. An overview of this new
+algorithm is given in this section. An even more detailed description is
+available in Chapter 2 of my PhD thesis `Algorithms and tools for the analysis
+of high-throughput DNA sequencing data <http://hdl.handle.net/2003/31824>`_.
+
+The algorithm is based on *semiglobal alignment*, also called *free-shift*,
+*ends-free* or *overlap* alignment. In a regular (global) alignment, the
+two sequences are compared from end to end and all differences occuring over
+that length are counted. In semiglobal alignment, the sequences are allowed to
+freely shift relative to each other and differences are only penalized in the
+overlapping region between them::
+
+      FANTASTIC
+   ELEFANT
+
+The prefix ``ELE`` and the suffix ``ASTIC`` do not have a counterpart in the
+respective other row, but this is not counted as an error. The overlap ``FANT``
+has a length of four characters.
+
+Traditionally, *alignment scores* are used to find an optimal overlap aligment:
+This means that the scoring function assigns a positive value to matches,
+while mismatches, insertions and deletions get negative values. The optimal
+alignment is then the one that has the maximal total score. Usage of scores
+has the disadvantage that they are not at all intuitive: What does a total score
+of *x* mean? Is that good or bad? How should a threshold be chosen in order to
+avoid finding alignments with too many errors?
+
+For cutadapt, the adapter alignment algorithm uses *unit costs* instead.
+This means that mismatches, insertions and deletions are counted as one error, which
+is easier to understand and allows to specify a single parameter for the
+algorithm (the maximum error rate) in order to describe how many errors are
+acceptable.
+
+There is a problem with this: When using costs instead of scores, we would like
+to minimize the total costs in order to find an optimal alignment. But then the
+best alignment would always be the one in which the two sequences do not overlap
+at all! This would be correct, but meaningless for the purpose of finding an
+adapter sequence.
+
+The optimization criteria are therefore a bit different. The basic idea is to
+consider the alignment optimal that maximizes the overlap between the two
+sequences, as long as the allowed error rate is not exceeded.
+
+Conceptually, the procedure is as follows:
+
+1. Consider all possible overlaps between the two sequences and compute an
+   alignment for each, minimizing the total number of errors in each one.
+2. Keep only those alignments that do not exceed the specified maximum error
+   rate.
+3. Then, keep only those alignments that have a maximal number of matches
+   (that is, there is no alignment with more matches).
+4. If there are multiple alignments with the same number of matches, then keep
+   only those that have the smallest error rate.
+5. If there are still multiple candidates left, choose the alignment that starts
+   at the leftmost position within the read.
+
+In Step 1, the different adapter types are taken into account: Only those
+overlaps that are actually allowed by the adapter type are actually considered.
diff --git a/doc/ideas.rst b/doc/ideas.rst
new file mode 100644
index 0000000..b5fa9d7
--- /dev/null
+++ b/doc/ideas.rst
@@ -0,0 +1,103 @@
+Ideas/To Do
+===========
+
+This is a rather unsorted list of features that would be nice to have, of
+things that could be improved in the source code, and of possible algorithmic
+improvements.
+
+- show average error rate
+- In colorspace and probably also for Illumina data, gapped alignment
+  is not necessary
+- ``--progress``
+- run pylint, pychecker
+- length histogram
+- check whether input is FASTQ although -f fasta is given
+- search for adapters in the order in which they are given on the
+  command line
+- more tests for the alignment algorithm
+- deprecate ``--rest-file``
+- ``--detect`` prints out best guess which of the given adapters is the correct one
+- alignment algorithm: make a 'banded' version
+- it seems the str.find optimization isn't very helpful. In any case, it should be
+  moved into the Aligner class.
+- allow to remove not the adapter itself, but the sequence before or after it
+- instead of trimming, convert adapter to lowercase
+- warn when given adapter sequence contains non-IUPAC characters
+- try multithreading again, this time use os.pipe() or 0mq
+- extensible file type detection
+- the --times setting should be an attribute of Adapter
+
+Specifying adapters
+-------------------
+
+The idea is to deprecate the ``-b``,  ``-g`` and ``-u`` parameters. Only ``-a``
+is used with a special syntax for each adapter type. This makes it a bit easier
+to add new adapter types in the feature.
+
+.. csv-table::
+
+    back,``-a ADAPTER``,``-a ADAPTER`` or ``-a ...ADAPTER``
+    suffix,``-a ADAPTER$``,``-a ...ADAPTER$``
+    front,``-g ADAPTER``,``-a ADAPTER...``
+    prefix,``-g ^ADAPTER``,``-a ^ADAPTER...`` (or have anchoring by default?)
+    anywhere,``-b ADAPTER``, ``-a ...ADAPTER...`` ???
+    unconditional,``-u +10``,``-a 10...`` (collides with colorspace)
+    unconditional,``-u -10``,``-a ...10$``
+    linked,(not implemented),``-a ADAPTER...ADAPTER`` or ``-a ^ADAPTER...ADAPTER``
+
+Or add only ``-a ADAPTER...`` as an alias for ``-g ^ADAPTER`` and
+``-a ...ADAPTER`` as an alias for ``-a ADAPTER``.
+
+The ``...`` would be equivalent to ``N*`` as in regular expressions.
+
+Another idea: Allow something such as ``-a ADAP$TER`` or ``-a ADAPTER$NNN``.
+This would be a way to specify less strict anchoring.
+
+Make it possible to specify that the rightmost or leftmost match should be
+picked. Default right now: Leftmost, even for -g adapters.
+
+Allow ``N{3,10}`` as in regular expressions (for a variable-length sequence).
+
+Use parentheses to specify the part of the sequence that should be kept:
+
+* ``-a (...)ADAPTER`` (default)
+* ``-a (...ADAPTER)`` (default)
+* ``-a ADAPTER(...)`` (default)
+* ``-a (ADAPTER...)`` (??)
+
+Or, specify the part that should be removed:
+
+    ``-a ...(ADAPTER...)``
+    ``-a ...ADAPTER(...)``
+    ``-a (ADAPTER)...``
+
+Model somehow all the flags that exist for semiglobal alignment. For start of the adapter:
+
+* Start of adapter can be degraded or not
+* Bases are allowed to be before adapter or not
+
+Not degraded and no bases before allowed = anchored.
+Degraded and bases before allowed = regular 5'
+
+By default, the 5' end should be anchored, the 3' end not.
+
+* ``-a ADAPTER...`` → not degraded, no bases before allowed
+* ``-a N*ADAPTER...`` → not degraded, bases before allowed
+* ``-a ADAPTER^...`` → degraded, no bases before allowed
+* ``-a N*ADAPTER^...`` → degraded, bases before allowed
+* ``-a ...ADAPTER`` → degraded, bases after allowed
+* ``-a ...ADAPTER$`` → not degraded, no bases after allowed
+
+
+
+Paired-end trimming
+-------------------
+
+* Could also use a paired-end read merger, then remove adapters with -a and -g
+
+Available/used letters for command-line options
+-----------------------------------------------
+
+* Remaining characters: All uppercase letters except A, B, G, M, N, O, U
+* Lowercase letters: i, j, k, l, s, w
+* Planned/reserved: Q (paired-end quality trimming), j (multithreading)
diff --git a/doc/index.rst b/doc/index.rst
new file mode 100644
index 0000000..f42e58f
--- /dev/null
+++ b/doc/index.rst
@@ -0,0 +1,25 @@
+.. include:: ../README.rst
+
+=================
+Table of contents
+=================
+
+.. toctree::
+   :maxdepth: 2
+
+   installation
+   guide
+   colorspace
+   recipes
+   ideas
+   changes
+
+
+..
+   Indices and tables
+   ==================
+   
+   * :ref:`genindex`
+   * :ref:`modindex`
+   * :ref:`search`
+
diff --git a/doc/installation.rst b/doc/installation.rst
new file mode 100644
index 0000000..305d910
--- /dev/null
+++ b/doc/installation.rst
@@ -0,0 +1,127 @@
+============
+Installation
+============
+
+Quickstart
+----------
+
+The easiest way to install cutadapt is to use ``pip`` on the command line::
+
+    pip install --user --upgrade cutadapt
+
+This will download the software from `PyPI (the Python packaging
+index) <https://pypi.python.org/pypi/cutadapt/>`_, and
+install the cutadapt binary into ``$HOME/.local/bin``. If an old version of
+cutadapt exists on your system, the ``--upgrade`` parameter is required in order
+to install a newer version. You can then run the program like this::
+
+    ~/.local/bin/cutadapt --help
+
+If you want to avoid typing the full path, add the directory
+``$HOME/.local/bin`` to your ``$PATH`` environment variable.
+
+
+Installation with conda
+-----------------------
+
+Alternatively, cutadapt is also available as a conda package from the
+`bioconda channel <https://bioconda.github.io/>`_. If you do not have conda,
+`install miniconda <http://conda.pydata.org/miniconda.html>`_ first.
+Then install cutadapt like this::
+
+    conda install -c bioconda cutadapt
+
+If neither `pip` nor `conda` installation works, keep reading.
+
+
+Dependencies
+------------
+
+Cutadapt requires this software to be installed:
+
+* One of Python 2.6, 2.7, 3.3, 3.4 or 3.5. Python 2.7 is a bit faster than the
+  other versions.
+* A C compiler.
+
+Under Ubuntu, you may need to install the packages ``build-essential`` and
+``python-dev`` (or ``python3-dev``).
+
+
+Installation
+------------
+
+If you have already downloaded and unpacked the ``.tar.gz`` file, then
+installation is done like this (replace "python" with "python3" to
+install the Python 3 version)::
+
+    python setup.py install --user
+
+If you get an error message::
+
+    error: command 'gcc' failed with exit status 1
+
+Then check the entire error message. If it says something about a missing ``Python.h``
+file, then you need to install the Python development packages. The
+appropriate package is called ``python-dev`` in Ubuntu (or ``python3-dev``
+for Python 3).
+
+
+System-wide installation (root required)
+----------------------------------------
+
+If you have root access, then you can install cutadapt system-wide by running::
+
+    sudo pip install cutadapt
+
+This installs cutadapt into `/usr/local/bin`.
+
+If you want to upgrade from an older version, use this command instead::
+
+    sudo pip install --upgrade cutadapt
+
+
+Uninstalling
+------------
+
+Type  ::
+
+    pip uninstall cutadapt
+
+and confirm with ``y`` to remove the package. Under some circumstances, multiple
+versions may be installed at the same time. Repeat the above command until you
+get an error message in order to make sure that all versions are removed.
+
+
+Shared installation (on a cluster)
+----------------------------------
+
+If you have a larger installation and want to provide cutadapt as a module
+that can be loaded and unloaded (with the Lmod system, for example), we
+recommend that you create a virtual environment and 'pip install' cutadapt into
+it. These instructions work on our SLURM cluster that uses the Lmod system
+(replace ``1.9.1`` with the actual version you want to use)::
+
+    BASE=/software/cutadapt-1.9.1
+    virtualenv $BASE/venv
+    $BASE/venv/bin/pip install --install-option="--install-scripts=$BASE/bin" cutadapt==1.9.1
+
+The ``install-option`` part is important. It ensures that a second, separate
+``bin/`` directory is created (``/software/cutadapt-1.9.1/bin/``) that *only*
+contains the ``cutadapt`` script and nothing else. To make cutadapt available to
+the users, that directory (``$BASE/bin``) needs to be added to the ``$PATH``.
+
+Make sure you *do not* add the ``bin/`` directory within the ``venv`` directory
+to the ``$PATH``! Otherwise, a user trying to run ``python`` who also has the
+cutadapt module loaded would get the python from the virtual environment,
+which leads to confusing error messages.
+
+A simple module file for the Lmod system matching the above example could look
+like this::
+
+    conflict("cutadapt")
+    whatis("adapter trimming tool")
+    prepend_path("PATH", "/software/cutadapt-1.9.1/bin")
+
+Please note that there is no need to “activate” the virtual environment:
+Activation merely adds the ``bin/`` directory to the ``$PATH``, so the
+``prepend_path`` directive is equivalent to activating the virtual environment.
diff --git a/doc/recipes.rst b/doc/recipes.rst
new file mode 100644
index 0000000..3020be4
--- /dev/null
+++ b/doc/recipes.rst
@@ -0,0 +1,83 @@
+=======
+Recipes
+=======
+
+For some trimming applications, the pre-defined adapter types behave differently
+from what you would like to have. In this section, we show some ways in which
+cutadapt can be made to behave in the desired way.
+
+.. note:: This section is still being written.
+
+
+Forcing matches to be at the end of the read
+--------------------------------------------
+
+Use ``-a TACGGCATXXX``. The ``X`` is always counted as a mismatch and will force
+the adapter match to be at the end. This is not the same as an anchored 3'
+adapter since partial matches are still allowed.
+
+
+Removing more than one adapter
+------------------------------
+
+If you want to remove more than one adapter, let's say a 5' adapter and a 3'
+adapter, you have two options.
+
+First, you can specify both adapters and also ``--times=2`` (or the short
+version ``-n 2``). For example::
+
+	cutadapt -g ^TTAAGGCC -a TACGGACT -n 2 -o output.fastq input.fastq
+
+This instructs cutadapt to run two rounds of adapter finding and removal. That
+means that, after the first round and only when an adapter was actually found,
+another round is performed. In both rounds, all given adapters (two in this
+case) are searched and removed. The problem is that it could happen that one
+adapter is found twice (so the 3' adapter, for example, could be removed twice).
+
+The second option is to not use the ``-n`` option, but to run cutadapt twice,
+first removing one adapter and then the other. It is easiest if you use a pipe
+as in this example::
+
+	cutadapt -g ^TTAAGGCC input.fastq | cutadapt -a TACGGACT - > output.fastq
+
+
+Trimming poly-A tails
+---------------------
+
+If you want to trim a poly-A tail from the 3' end of your reads, use the 3'
+adapter type (``-a``) with an adapter sequence of many repeated ``A``
+nucleotides. Starting with version 1.8 of cutadapt, you can use the
+following notation to specify a sequence that consists of 100 ``A``::
+
+	cutadapt -a "A{100}" -o output.fastq input.fastq
+
+This also works when there are sequencing errors in the poly-A tail. So this
+read ::
+
+	TACGTACGTACGTACGAAATAAAAAAAAAAA
+
+will be trimmed to::
+
+	TACGTACGTACGTACG
+
+If for some reason you would like to use a shorter sequence of ``A``, you can
+do so: The matching algorithm always picks the leftmost match that it can find,
+so cutadapt will do the right thing even when the tail has more ``A`` than you
+used in the adapter sequence. However, sequencing errors may result in shorter
+matches than desired. For example, using ``-a "A{10}"``, the read above (where
+the ``AAAT`` is followed by eleven ``A``) would be trimmed to::
+
+	TACGTACGTACGTACGAAAT
+
+Depending on your application, perhaps a variant of ``-a A{10}N{90}`` is an
+alternative, forcing the match to be located as much to the left as possible,
+while still allowing for non-``A`` bases towards the end of the read.
+
+
+Other things (unfinished)
+-------------------------
+
+* How to detect adapters
+* Use cutadapt for quality-trimming only
+* Use it for minimum/maximum length filtering
+* Use it for conversion to FASTQ
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..1a2d235
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,148 @@
+"""
+Build cutadapt.
+
+Cython is run when
+* no pre-generated C sources are found,
+* or the pre-generated C sources are out of date,
+* or when --cython is given on the command line.
+"""
+import sys
+import os.path
+
+from distutils.core import setup, Extension
+from distutils.version import LooseVersion
+from distutils.command.sdist import sdist as _sdist
+from distutils.command.build_ext import build_ext as _build_ext
+
+MIN_CYTHON_VERSION = '0.24'
+
+if sys.version_info < (2, 6):
+	sys.stdout.write("At least Python 2.6 is required.\n")
+	sys.exit(1)
+
+
+# set __version__
+with open(os.path.join(os.path.dirname(__file__), 'cutadapt', '__init__.py')) as f:
+	for line in f:
+		if line.startswith('__version__'):
+			exec(line)
+			break
+
+
+def out_of_date(extensions):
+	"""
+	Check whether any pyx source is newer than the corresponding generated
+	C source or whether any C source is missing.
+	"""
+	for extension in extensions:
+		for pyx in extension.sources:
+			path, ext = os.path.splitext(pyx)
+			if ext not in ('.pyx', '.py'):
+				continue
+			if extension.language == 'c++':
+				csource = path + '.cpp'
+			else:
+				csource = path + '.c'
+			# When comparing modification times, allow five seconds slack:
+			# If the installation is being run from pip, modification
+			# times are not preserved and therefore depends on the order in
+			# which files were unpacked.
+			if not os.path.exists(csource) or (
+				os.path.getmtime(pyx) > os.path.getmtime(csource) + 5):
+				return True
+	return False
+
+
+def no_cythonize(extensions, **_ignore):
+	"""
+	Change file extensions from .pyx to .c or .cpp.
+
+	Copied from Cython documentation
+	"""
+	for extension in extensions:
+		sources = []
+		for sfile in extension.sources:
+			path, ext = os.path.splitext(sfile)
+			if ext in ('.pyx', '.py'):
+				if extension.language == 'c++':
+					ext = '.cpp'
+				else:
+					ext = '.c'
+				sfile = path + ext
+			sources.append(sfile)
+		extension.sources[:] = sources
+
+
+def check_cython_version():
+	"""Exit if Cython was not found or is too old"""
+	try:
+		from Cython import __version__ as cyversion
+	except ImportError:
+		sys.stdout.write(
+			"ERROR: Cython is not installed. Install at least Cython version " +
+			str(MIN_CYTHON_VERSION) + " to continue.\n")
+		sys.exit(1)
+	if LooseVersion(cyversion) < LooseVersion(MIN_CYTHON_VERSION):
+		sys.stdout.write(
+			"ERROR: Your Cython is at version '" + str(cyversion) +
+			"', but at least version " + str(MIN_CYTHON_VERSION) + " is required.\n")
+		sys.exit(1)
+
+
+extensions = [
+	Extension('cutadapt._align', sources=['cutadapt/_align.pyx']),
+	Extension('cutadapt._qualtrim', sources=['cutadapt/_qualtrim.pyx']),
+	Extension('cutadapt._seqio', sources=['cutadapt/_seqio.pyx']),
+]
+
+
+class build_ext(_build_ext):
+	def run(self):
+		# If we encounter a PKG-INFO file, then this is likely a .tar.gz/.zip
+		# file retrieved from PyPI that already includes the pre-cythonized
+		# extension modules, and then we do not need to run cythonize().
+		if os.path.exists('PKG-INFO'):
+			no_cythonize(extensions)
+		else:
+			# Otherwise, this is a 'developer copy' of the code, and then the
+			# only sensible thing is to require Cython to be installed.
+			check_cython_version()
+			from Cython.Build import cythonize
+			self.extensions = cythonize(self.extensions)
+		_build_ext.run(self)
+
+
+class sdist(_sdist):
+	def run(self):
+		# Make sure the compiled Cython files in the distribution are up-to-date
+		from Cython.Build import cythonize
+		check_cython_version()
+		cythonize(extensions)
+		_sdist.run(self)
+
+
+setup(
+	name = 'cutadapt',
+	version = __version__,
+	author = 'Marcel Martin',
+	author_email = 'marcel.martin at scilifelab.se',
+	url = 'https://cutadapt.readthedocs.org/',
+	description = 'trim adapters from high-throughput sequencing reads',
+	license = 'MIT',
+	cmdclass = {'sdist': sdist, 'build_ext': build_ext},
+	ext_modules = extensions,
+	packages = ['cutadapt', 'cutadapt.scripts'],
+	scripts = ['bin/cutadapt'],
+	classifiers = [
+		"Development Status :: 5 - Production/Stable",
+		"Environment :: Console",
+		"Intended Audience :: Science/Research",
+		"License :: OSI Approved :: MIT License",
+		"Natural Language :: English",
+		"Programming Language :: Cython",
+		"Programming Language :: Python :: 2.6",
+		"Programming Language :: Python :: 2.7",
+		"Programming Language :: Python :: 3",
+		"Topic :: Scientific/Engineering :: Bio-Informatics"
+	]
+)
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 0000000..a1b3311
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,3 @@
+tmp.log
+tmp.fastaq
+tmp.fastq
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/cut/454.fa b/tests/cut/454.fa
new file mode 100644
index 0000000..7d4f345
--- /dev/null
+++ b/tests/cut/454.fa
@@ -0,0 +1,118 @@
+>000163_1255_2627 length=8 uaccno=E0R4ISW01DCIQD
+GTGTGGTG
+>000652_1085_0667 length=80 uaccno=E0R4ISW01CXJXP
+ATTGAAGAGGTTGGTAAGTTTTAAGTTGGTAGGTGGTTGGGGAGTGGTTGGAGAGGAGTTGTTGGGAGTTTGTGTCCTGC
+>000653_1285_1649 length=92 uaccno=E0R4ISW01DE4SJ
+AATTAGTCGAGCGTTGTGGTGGGTATTTGTAATTTTAGCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGGAGGTTGC
+>000902_0715_2005 length=50 uaccno=E0R4ISW01B03K3
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>001146_1255_0340 length=50 uaccno=E0R4ISW01DCGYU
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>001210_1147_1026 length=124 uaccno=E0R4ISW01C2Z5W
+GAGGTGGTGAGTGTTGTGTGTTTAGATTGTGTGTGGTGGTTGGGAGTGGGAGTTGTATTTTAGGGTGTGGGTTGGGAGAGTGAAAGTTGTGGGTGTTTTGGATGGTGGGTTAGGTGGTTGTGCC
+>001278_1608_2022 length=66 uaccno=E0R4ISW01D7HW4
+CACACACACTCTTCCCCATACCTACTCACACACACACACACACACACAAACATACACAAATAATTC
+>001333_1518_1176 length=100 uaccno=E0R4ISW01DZKTM
+AATTGTCGTTTGATTGTTGGAAAGTAGAGGGTCGGGTTGGGGTAGATTCGAAAGGGGAATTTTGAGAAAAGAAATGGAGGGAGGTAGGAAAATTTTTTGC
+>001398_1584_1549 length=112 uaccno=E0R4ISW01D5DPB
+TAATGAAATGGAATGGAATGGAATGGAATGAAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGAAATGGAATGGAGTATAAAGGAATGGAATTAC
+>001455_1136_2179 length=50 uaccno=E0R4ISW01C12AD
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>001481_1165_0549 length=50 uaccno=E0R4ISW01C4KON
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>001744_1376_3512 length=101 uaccno=E0R4ISW01DM5T2
+TAAGTAGGGAAGGTTTGAGGTTGTTGGTGTTGGTAGTAGGGGTGTTTTAGTTAGGGGTTGTAGTTTGTTAAGGGAATTTTATTTGAGTTTAGAATTGAGGC
+>001893_1084_1137 length=120 uaccno=E0R4ISW01CXG4Z
+TGTATATTTTGTTGGGTTTGTATATATTGTTAGGTGTGGTTGGTGAGTTGTATTGGTGGTGGTGTAAGGTGAGTGGAAATGGGAATGGATTGTAGATATGTTGGATTTGTGGTTTTTGGT
+>001927_0254_0706 length=139 uaccno=E0R4ISW01AWLLG
+TGGAATCATCTAAGGGACACAAATAGAATCATCATTGAATGGAATCGAATGGAATCATCTAATGTACTCGAATGGAATTATTATTGAATAGAATAGAATGGAATTATCGAATGGAATCAAATGGAATGTAATGGAATGC
+>002007_1338_1037 length=95 uaccno=E0R4ISW01DJRTR
+GGGTTGTGTATTTGGATAGTATGTGGAAAATGGTATTAAAAAGAATTTGTAGTTGGATTGTTGGTGGTTATTTAGTTTTTGGGTAATGGGTAGAT
+>002186_1130_0654 length=50 uaccno=E0R4ISW01C1H5C
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>002282_1237_2702 length=92 uaccno=E0R4ISW01DAXWG
+AATTAGCCGGGCGTGATGGCGGGCGTTTGTAGTTTTAGTTATTCGGGAGGTTGAGGTAGGAGAATGGCGTGAATTCGGGAAGCGGAGTTTGC
+>002382_1259_0997 length=64 uaccno=E0R4ISW01DCT37
+TAAGGGTTGAAGCGAGGTAGGTAGTTTGTTTGTGGTTTTGTTTCGTATTTTTGTTTCGTATCCC
+>002477_0657_0655 length=131 uaccno=E0R4ISW01BVY8H
+TTTTTGGAAAGTTGGGTGGGTATAGTTTTGAGTAGTTAGAGGTATTATAATAGTATTAGGAAGTTGAATGTGAGGGTATAAGAGTTAATTTGATTTTTCGTTGATATGTTTGTTGTTTGAAGTTAGAGTGC
+>003149_1553_2333 length=128 uaccno=E0R4ISW01D2OBZ
+TATTTAGTTTTAGTTTGTTTAGGTGGTTATAGAATACGGAGTTTATGAAGTTGATTAGGAATATTATTAGTTGAATTAAGAATTGGGAAGAGAGGGGAACGGGAAGGGACGTGAGTGATTATTATTGC
+>003194_1475_2845 length=58 uaccno=E0R4ISW01DVT7J
+TATTTTGGGTTAAGTCGGGTTTAGTTGTTAGGGCGAGAAGTTAGTTGTTGACCCCTGC
+>003206_1315_0479 length=52 uaccno=E0R4ISW01DHQPD
+GGGTTGGATAATATGATGGTGTTGGGGAATATTTAGGTATGTGGTTTGTGGC
+>003271_0173_0314 length=82 uaccno=E0R4ISW01APHAK
+GTTTATTTGTTATTTATTTTTAGGTTTAGAAGAGTGTTTGGTATTTATTGAGGATTTAGTATTTGTTAGAAGGATTGGATTC
+>003443_1737_2250 length=21 uaccno=E0R4ISW01EITSS
+TGTAGGTTGTGTTGTAGGTTG
+>002633_1776_1582 length=40 uaccno=E0R4ISW01EL8JK
+CAGGGTGGATTGGGGAACACACAGTGTGGCCGCGTGATTC
+>002663_0725_3154 length=84 uaccno=E0R4ISW01B1Z2S
+GCGTTTTATATTATAATTTAATATTTTGGAGGTTGGGTGCGGTGGTTTACGTTTGTAGTTTAGTATTTGGGAGGTTAAGGTAGC
+>002761_1056_4055 length=72 uaccno=E0R4ISW01CU2V9
+AATTTTATTCGATTTATGTGATGATTTATTTATTTTATTTGAAGATGATTTTATTCGAGATTATTCGATGAT
+>002843_0289_2275 length=80 uaccno=E0R4ISW01AZPE9
+ATTGAAGAGGTTGGTAAGTTTTAAGTTGGTAGGTGGTTGGGGAGTGGTTGGAGAGGAGTTGTTGGGAGTTTGTGTCCTGC
+>002934_1762_2177 length=50 uaccno=E0R4ISW01EK0Q7
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCC
+>003515_1711_1058 length=79 uaccno=E0R4ISW01EGIPG
+AATTGAATGGAATTATTATTGAATGGATTCGAATGGAATTATTATTGAATGGAATCATCGAGTGGAATCGAATGGAATC
+>003541_1276_1589 length=70 uaccno=E0R4ISW01DECAV
+TAGTTTAGGGTGGTAGTTTGGATAAGGTAGTTTTACGGTTTAGTAGTAGTAGGTTAAGTAGGAAAACTGC
+>003587_1522_1804 length=109 uaccno=E0R4ISW01DZXX6
+AATTTATGTAGTGGAAGTAGGATATAAAGAATAGGTTAATGGATTTTGAGATATTAAAAAGAGTAGGAAATTAGTTGAGAGGTTAAGTAGTAGTTTATTTTAGCCACCC
+>003592_0076_0430 length=92 uaccno=E0R4ISW01AGYTC
+AATTAGTTAGGCGTGGTGGCGGGTGTTTGTAGTTTTAGTTATTCGGGAGGTTGAGGTAGGAGAATGTTGTGAATTTAGGAGGTGGAGTTTGC
+>003957_0595_0965 length=130 uaccno=E0R4ISW01BQJIV
+TAATATTAGGTGTCAATTTGACTGGATCGAGGGATGTGTGTCGGTGAGAGTCTCACTAGAGGTTGATATTTGAGTCGTTAGACTGGGAGAGGAAGACCGAACTGTCAAGTGTATGGGCGCCATCCAATTC
+>003986_1127_2937 length=61 uaccno=E0R4ISW01C1AFF
+TAATGGAATGGAATTTTCGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATTAC
+>004012_1559_1491 length=72 uaccno=E0R4ISW01D26M9
+TAGTGGATATAAATGGAATGGATTGGAATGGAATGGATACGAATGGAATGGATTGGAGTGGAATGGATTGAC
+>004030_1508_2061 length=123 uaccno=E0R4ISW01DYPWF
+TACGTATATACGCGTACGCGTATACGTATATACGCGTATACGTATACGCGTACGTATATATACGCGTATACGTTTACGTACGTACGCGTATATACGTACGTATACACACACGCATATGCATAC
+>004038_1061_2047 length=109 uaccno=E0R4ISW01CVG5D
+AATTGATTCGAATGGAATGGATTGGAATGGAACGGATTTGAATGGAATGGATTGGAATGGAATGGATTGAATGGAATGGATTGGAGAGGATTGGATTTGAATGGAATTC
+>004105_1121_0391 length=92 uaccno=E0R4ISW01C0PH1
+AATTAGTTGGGCGTGGTGGCGAGTGTTTGTAATTTTAGTTATTTAGGAGGTTGAGGTAGGAGAATTATTTGAACCCGGTAGACGGAAGTTGC
+>004129_1618_3423 length=79 uaccno=E0R4ISW01D8ELT
+AATTGAATGGTATTGAAAGGTATTAATTTAGTGGAATGGAATGGAATGTATTGGAATGGAAAATAATGGAATGGAGTGC
+>004203_0451_0902 length=72 uaccno=E0R4ISW01BDWC4
+TAGTTGGTGTGTTGTAATCGAGACGTAGTTGGTTGGTACGGGTTAGGGTTTTGATTGGGTTGTTGTGTTTGC
+>004626_1937_0919 length=180 uaccno=E0R4ISW01E0CVD
+TAGAGTAGATAGTAGGGTTAGAGAAGGTAGGGTACGTTTAGTTTGTTAGTAAGGTTTAAGTTTTGGGTGGGAAAGGTTAGTGGCGGGAAGGGACGAAGGTGGTAATCGAGAGTAGATTTAGAGAAGTTTTTGAAGTGGGCGTTGGGAGTTTTCGAAGTATTGAGAGAGAGGAGCTTGTGC
+>004913_0641_2071 length=92 uaccno=E0R4ISW01BULRD
+AATTAGTCGAGCGTTGTGGTGGGTATTTGTAATTTTAGCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGGAGGTTGC
+>005063_0599_1983 length=84 uaccno=E0R4ISW01BQWX9
+ATGTGGTGAAGATTGGTTTTAGGTGTTTTAATGTGGATTTTCAGGGGTTTTAAAAGGGTTGGGAGAGTGAAATATATATAAGGC
+>005140_0759_3209 length=74 uaccno=E0R4ISW01B4ZKR
+TAGTATAGAGGGTTTGTGGTCGTGAGGGTGTTGATGGCGGGAGGGTTTTGATGGTAGGAGGGCCCGTGCTGTGC
+>005351_0883_3221 length=95 uaccno=E0R4ISW01CFVHJ
+TTAGGTGTTATAGTTGAGTGAGATGTTAGTGTTTAATGGTTTTATTTAGGTTGATGGGTTAATGAGGGGGTATTTGATAGTTTTGAAGATTTGAC
+>005380_1702_1187 length=160 uaccno=E0R4ISW01EFQC1
+GTTTTTCGAGTATATATTTAGTAGTACGCTCGACTTCTCTTATATAAAGGTTTTGGTTTTTATAGGTTTTTCCATTGTGTCTGCCTGGGGGAGGGCCCTTCTCCTTCAGGATACTGTAGCTTCTCTGCGTGATAAGCCAGCATTCACGGCTTTCAGGTGC
+>005568_1060_1943 length=20 uaccno=E0R4ISW01CVDWP
+ATAGCGTATTTCTCACCTGC
+>005740_1536_2697 length=116 uaccno=E0R4ISW01D06VV
+TAAAGAGGTGTTATTATTAGTTAGGAGAGGAGGTGGTTAGATAGTAGTGGGATTATAGGGGAATATAGAGTTGTTAGTTTAGGGATAAGGGATTGATCGATGGGTTAGGTCTCTGC
+>005753_1884_3877 length=53 uaccno=E0R4ISW01EVRNB
+AAACTGAGTTGTGATGTTTGCATTCAACTCACAGAGTTCAACATTCCTTTAAC
+>read_equals_adapter 1a
+
+>read_equals_start_of_adapter 1b
+
+>read_equals_end_of_adapter 1c
+
+>read_equals_middle_of_adapter 1d
+
+>read_ends_with_adapter 2a
+GCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCG
+>read_ends_with_start_of_adapter 2b
+GCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCG
+>read_contains_adapter_in_the_middle 3
+CGTAGTTGGTTGGTACG
+>read_starts_with_adapter 4a
+AAAGGTTTTGGTTTTTATAGGTTTTT
+>read_starts_with_end_of_adapter 4b
+AAAGGTTTTGGTTTTTATAGGTTTTT
diff --git a/tests/cut/anchored-back.fasta b/tests/cut/anchored-back.fasta
new file mode 100644
index 0000000..c65f89a
--- /dev/null
+++ b/tests/cut/anchored-back.fasta
@@ -0,0 +1,8 @@
+>read1
+sequence
+>read2
+sequenceBACKADAPTERblabla
+>read3
+sequenceBACKADA
+>read4
+sequence
diff --git a/tests/cut/anchored.fasta b/tests/cut/anchored.fasta
new file mode 100644
index 0000000..cca3279
--- /dev/null
+++ b/tests/cut/anchored.fasta
@@ -0,0 +1,8 @@
+>read1
+sequence
+>read2
+blablaFRONTADAPTsequence
+>read3
+NTADAPTsequence
+>read4
+sequence
diff --git a/tests/cut/anchored_no_indels.fasta b/tests/cut/anchored_no_indels.fasta
new file mode 100644
index 0000000..b189dd4
--- /dev/null
+++ b/tests/cut/anchored_no_indels.fasta
@@ -0,0 +1,12 @@
+>no_mismatch (adapter: TTAGACATAT)
+GAGGTCAG
+>one_mismatch
+GAGGTCAG
+>two_mismatches
+TAAGACGTATGAGGTCAG
+>insertion
+ATTAGACATATGAGGTCAG
+>deletion
+TAGACATATGAGGTCAG
+>mismatch_plus_wildcard
+TNAGACGTATGAGGTCAG
diff --git a/tests/cut/anchored_no_indels_wildcard.fasta b/tests/cut/anchored_no_indels_wildcard.fasta
new file mode 100644
index 0000000..245cd41
--- /dev/null
+++ b/tests/cut/anchored_no_indels_wildcard.fasta
@@ -0,0 +1,12 @@
+>no_mismatch (adapter: TTAGACATAT)
+GAGGTCAG
+>one_mismatch
+GAGGTCAG
+>two_mismatches
+TAAGACGTATGAGGTCAG
+>insertion
+ATTAGACATATGAGGTCAG
+>deletion
+TAGACATATGAGGTCAG
+>mismatch_plus_wildcard
+GAGGTCAG
diff --git a/tests/cut/anywhere_repeat.fastq b/tests/cut/anywhere_repeat.fastq
new file mode 100644
index 0000000..e5ae7f3
--- /dev/null
+++ b/tests/cut/anywhere_repeat.fastq
@@ -0,0 +1,28 @@
+ at prefix:1_13_1400/1
+CGTCCGAANTAGCTACCACCCTGATTAGACAAAT
++
+)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1500/1
+NNNNANNNNNNNNNNNNNNNNNNNNNNNNNNNNN
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1550/1
+NNNNANNNNNNNNNNNNNNNNNNNNNNNNNNNNN
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1600/1
+NNNNATGTCCCCTGCCACATTGCCCTAGTNNNNN
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1700/1
+NNNNATGTCCCCTGCCACATTGCCCTAGTTTATT
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1800/1
+GTTCATGTCCCCTGCCACATTGCCCTAGTTTATT
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1900/1
+ATGGCTGTCCCCTGCCACATTGCCCTAGTNNNNN
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/discard-untrimmed.fastq b/tests/cut/discard-untrimmed.fastq
new file mode 100644
index 0000000..5caed44
--- /dev/null
+++ b/tests/cut/discard-untrimmed.fastq
@@ -0,0 +1,4 @@
+ at prefix:1_13_1440/1
+CTNCCCTGCCACATTGCCCTAGTTAAAC
++
+57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/discard.fastq b/tests/cut/discard.fastq
new file mode 100644
index 0000000..d3668fd
--- /dev/null
+++ b/tests/cut/discard.fastq
@@ -0,0 +1,4 @@
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/dos.fastq b/tests/cut/dos.fastq
new file mode 100644
index 0000000..a3437d1
--- /dev/null
+++ b/tests/cut/dos.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGA
++
+)3%)&&&&!.1&(6:<'67..*,:
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCC
++
+;<:&:A;A!9<<<,7:<=3=;:
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/empty.fastq b/tests/cut/empty.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/tests/cut/example.fa b/tests/cut/example.fa
new file mode 100644
index 0000000..50ab75e
--- /dev/null
+++ b/tests/cut/example.fa
@@ -0,0 +1,18 @@
+>read1
+MYSEQUENCE
+>read2
+MYSEQUENCE
+>read3
+MYSEQUENCE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+MYSEQUENCE
+>read8
+MYSEQUENCE
+>read9
+SOMETHING
diff --git a/tests/cut/examplefront.fa b/tests/cut/examplefront.fa
new file mode 100644
index 0000000..b60e194
--- /dev/null
+++ b/tests/cut/examplefront.fa
@@ -0,0 +1,18 @@
+>read1
+
+>read2
+MYSEQUENCEADAP
+>read3
+SOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+MYSEQUENCE
+>read8
+MYSEQUENCE
+>read9
+MYSEQUENCE
diff --git a/tests/cut/illumina.fastq b/tests/cut/illumina.fastq
new file mode 100644
index 0000000..9e74b7d
--- /dev/null
+++ b/tests/cut/illumina.fastq
@@ -0,0 +1,400 @@
+ at SEQ:1:1101:9010:3891#0/1 adapter start: 51
+ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGG
++
+FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDF
+ at SEQ:1:1101:9240:3898#0/1
+CCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTG
++
+GHGHGHHHHGGGDHHGDCGFEEFHHGDFGEHHGFHHHHHGHEAFDHHGFHHEEFHGHFHHFHGEHFBHHFHHHH at GGGDGDFEEFC@=D?GBGFGF:FB6D
+ at SEQ:1:1101:9207:3899#0/1 adapter start: 64
+TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAAC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHF
+ at SEQ:1:1101:9148:3908#0/1 adapter start: 28
+ACGACGCAATGGAGAAAGACGGAGAGCG
++
+HHHHHHHHHHHHGHHHHGHHHHHHHHHH
+ at SEQ:1:1101:9044:3916#0/1 adapter start: 78
+AACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHGHHHHHHHHHHHHFHEBFHFFEFHE
+ at SEQ:1:1101:9235:3923#0/1
+TTGATGCGGTTATCCATCTGCTTATGGAAGCCAAGCATTGGGGATTGAGAAAGAGTAGAAATGCCACAAGCCTCAATAGCAGGTTTAAGAGCCTCGATACG
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHBHHFHFHHHHHFHHCHHFFHHHHEHHFDHCEEHHHFHHFHFEHHHHHHHHHEHHGFHH<FGGFABGGG?
+ at SEQ:1:1101:9086:3930#0/1 adapter start: 46
+CCATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH at HHEHHHFH
+ at SEQ:1:1101:9028:3936#0/1
+CTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHCHFHHFHGBEFFFEFEFHEHHHFEHHFEEC>CDCEEEFDFFHHHCFFEFE?EBFEB?3
+ at SEQ:1:1101:9185:3939#0/1
+CGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTAGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATC
++
+HHHHHHHHHHHHHHFHHEHHHDHHFGHHHCHHHHHDHHHHFECEGBD<DCFHBHBBEEEGCCCDB?C9DECCC3CD<@DA<@>@@?A?DAFF9F<@@08?<
+ at SEQ:1:1101:9140:3961#0/1 adapter start: 66
+CAGGAGAAACATACGAAGGCGCATAACGATACCACTGACCCTCAGCAATCTTAAACTTCTTAGACG
++
+HHHHHHHGHHHHHHHHHHHGHHHHHHHHHHHHHHHHFHHHHHHFGHHHHHHHHHHHHHHHHDHHFH
+ at SEQ:1:1101:9073:3961#0/1 adapter start: 49
+GTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGC
++
+HHHHHHHHFHHHHHHGHHHHHHHHHEHHGHHGHHHHHHHHHHGEHHHHH
+ at SEQ:1:1101:9196:3971#0/1 adapter start: 18
+ACCAGAAGGCGGTTCCTG
++
+HHHHHHHHHFHHHHHHHH
+ at SEQ:1:1101:9053:3973#0/1
+TTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGGTTTATTGCTGTTTGTTTCTATGTGGCTTAAAACGTTACCA
++
+A39>A################################################################################################
+ at SEQ:1:1101:9120:3979#0/1
+GGCGTTGACAGATGTATCCATCTGAATGCAATGAAGAAAACCACCATTACCAGCATTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAG
++
+HHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFGFFDHBHHHFGEHHHFGHHHEHHHGH
+ at SEQ:1:1101:9045:3988#0/1 adapter start: 91
+TAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGCAGTGTTAA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHFHHHHHHHHHHHFHHHHHHDHHHHHHHFHFFHHGHEHHGHHHGHGHHFH
+ at SEQ:1:1101:9418:3756#0/1
+TAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGT
++
+HHHHHHHHHHHHHHHHFHHHGHEHHHFHHHHFFEHHFHHHHGHHFHFHHHGHHHDHFHCHFCFBCFEFDEHHHHHG at GGGGHHGHFFEG=AB at C:EDEEEH
+ at SEQ:1:1101:9394:3759#0/1
+CCCTCGCTTTCCTGCTCCTGTTGAGGTTATTGCTGCCGTCATTGCTTATTATGTTCATCTCGGCAACATTCATACGGTCTGGCTTATCCGTGCAGAGACTG
++
+#####################################################################################################
+ at SEQ:1:1101:9365:3766#0/1
+AAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAACAATTTAGACATGGCGCCACCAGCAAGAGCAGAAGCAATACCGCCAGCAA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHFHHHHEHHFGHHHHFEHHHHFEHHFDFFAFHEFHFHDFFFFHHDH?DFABFDHADFDHHHFBF
+ at SEQ:1:1101:9436:3776#0/1
+GAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGGAGTCGGA
++
+HHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHFHGHHHHHHHGHHHHHHFDHHHHHHHHHHHHHFH?HHHHHFBHEH at GHHGD=EEEE88==%893A@@;
+ at SEQ:1:1101:9354:3801#0/1
+CCAGCAAGAGCAGAAGCAATACCGCCAGCAATAGCACCAAACATAAATCACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGC
++
+HHHHHHHHHGHHGHHEGHHEHFGFEHHGHGGHHHHHHHFHGHHFHHEFFFHEHHFHHHDHE5EDFCAC+C)4&27DDA?7HFHDHEFGFG,<@7>?>??<A
+ at SEQ:1:1101:9389:3804#0/1 adapter start: 28
+ATTAGAGCCAATACCATCAGCTTTACCG
++
+GGGGFDGGHFHHHFFFGBEFGGGGGEFE
+ at SEQ:1:1101:9477:3819#0/1 adapter start: 28
+ATAAAGGAAAGGATACTCGTGATTATCT
++
+HHHHHHHHHHHHHHHHGHHHHHHHHHHH
+ at SEQ:1:1101:9428:3823#0/1
+CGTCAGTAAGAACGTCAGTGTTTCCTGCGCGTACACGCAAGGTAAACGCGAACAATTCAGCGGCTTTAACCGGACGCTCGACGCCATTAATAATGTTTTCC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHFGHGHHHHHHHEHHHHFHHHHHFHHHFHH?FHEFFFDGFDAFDCFAFDBFGBFGFHHHHHHHHHFHFH;8
+ at SEQ:1:1101:9403:3824#0/1 adapter start: 70
+GCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAA
++
+HHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHGDHDHHHHHHHHHGHHHHGHEHGHHHHFFHHHHH
+ at SEQ:1:1101:9362:3824#0/1
+ACCATGAAACCAACATAAACATTATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATC
++
+HHHHHHHGHHHHHHHHHHHHHHHGHHHHHFHHHHHHHHFHHFHHHFHHHHHHHHHFHEHHHFHBHFHHHFCEFDEHHHHGHHHHHHHHHEFFFHHFFFDAG
+ at SEQ:1:1101:9480:3842#0/1 adapter start: 54
+GTACGGATTGTTCAGTAACTTGACTCATGATTTCTTACCTATTAGTGGTTGAAC
++
+BDCCC at 5<<<@BBB7DDDDD<<<9>::@<5DDDDDCDCBEDCDDDDBDDDBAA1
+ at SEQ:1:1101:9286:3846#0/1
+TGATTAAACTCCTAAGCAGAAAACCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGTTATAACCTCACACT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHFHHDGCEGGHHHHFHHFHEHHFHEGHGHGF
+ at SEQ:1:1101:9403:3867#0/1 adapter start: 1
+G
++
+H
+ at SEQ:1:1101:9341:3873#0/1 adapter start: 88
+CCTAAGCAGAAAACCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGTTATAACCTCAC
++
+HHHHHHHGGFHGHHHHHGHHHHFGHGHHHHEHHHFHFHFHFHH?CEEEDFCEFCDFFHFEABEDF.ECDCDFEEEEEGGFADACDHHH
+ at SEQ:1:1101:9381:3881#0/1 adapter start: 41
+ACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGC
++
+HHHHHHHHHHHHGHGHDHHHHHHHHFEHHHGGGGFFBGFFF
+ at SEQ:1:1101:9360:3884#0/1
+TAATACCTTTCTTTTTGGGGTAATTATACTCATCGCGAATATCCTTAAGAGGGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCGTTC
++
+HGDEHGHDGHFGFGHFDFFF7EEEEGGFGGEGHEGHHHHFFFEHHHFHEHFBFFF>?DEEBF=?CDB:DFBGFBBGDFFHF?FAFGGABFGGFAFE6EDDC
+ at SEQ:1:1101:9323:3894#0/1 adapter start: 100
+ATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTG
++
+HHGHHHHHHHHHHHHHHHHHHHEHDHHHHHGEHHFFHHFFFHHHHHHHHFHDHHBHGHB?HHDFFF?EFEHFHBFGEGGFFFDFBHFHHHHHFHHEFFFCF
+ at SEQ:1:1101:9267:3900#0/1 adapter start: 89
+GTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHFHHHHEHHEHHHFHHHHHHHHHHFHFHECFFHABGGGIGHHHGGFFGF
+ at SEQ:1:1101:9416:3909#0/1
+TAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTAGAGTCAATAGCAAGGCCACGACGCAATGGAGAAAGACGGAGAGCGCCAACGGCGTC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHEHHGHHFEFHEFHFFDHEFHFAFFFA?GDFGFE at FFFB?B7EEFEFE?DAA##
+ at SEQ:1:1101:9360:3917#0/1 adapter start: 68
+ATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAA
++
+HHHHHHHHHHHHHHHHHHHFHHHHHHHHHHFHHHHHHHFHEFHHHEHHCFFEFEE9AFFBBDCDCAEE
+ at SEQ:1:1101:9337:3918#0/1 adapter start: 14
+CATCAGCACCAGCA
++
+FDEGGGCDBEFCDF
+ at SEQ:1:1101:9307:3927#0/1 adapter start: 15
+TCAGCGCCTTCCATG
++
+FFFFFFFFFFFFFDF
+ at SEQ:1:1101:9479:3929#0/1 adapter start: 9
+GACAAATTA
++
+HHHHHHHHH
+ at SEQ:1:1101:9277:3934#0/1 adapter start: 71
+CTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHEHFHHHHFHHHHHFHHEHFHHHFHHFDHHFHHE
+ at SEQ:1:1101:9442:3934#0/1
+AGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCGAGAAAGCTCAGTCTCAGGAGGAAGCGGAGCAGTCCAAATGTTTTTGAGATGGCAGCA
++
+HHHHHHHHHGHHHHHFGHHBHHEHGFHHDHGDEGDHHHHHFHHHHHAHHH?FEEBEFDFBEBEEFEHFE7ECCDCG=FDFFDFFFHHHHFEEBEF;BEAEG
+ at SEQ:1:1101:9329:3935#0/1
+AGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCT
++
+GFGGGEEGDHHHGGEHHHHHHGGFHHEAHHAGDEGEGGEDG at GGGHHGHHFGGH6@CADDHHBEEE at 8EBGEEFGGGHFHHHHGEGFGGEFBGEDDE?E7E
+ at SEQ:1:1101:9445:3956#0/1 adapter start: 81
+TGCAACAACTGAACGGACTGGAAACACTGGTCATAATCATGGTGGCGAATAAGTACGCGTTCTTGCAAATCACCAGAAGGC
++
+HHHHHHHHHGFHHHHHHHHHHHHHHGHHHHFHHHHHHHHHHHFGHHHFGHHHHFGHHFHEHHHHHHHHHHHHGBHHHHGFG
+ at SEQ:1:1101:9357:3957#0/1
+TTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTG
++
+HHHHHHGHHHHHHHHHHGHEHHHHHGHEHHHHHHHHHHHHHHGHEBGGFGFFFFFBH?HCEEED<FEEEFFHHDHHHHEHHHGFHHH:BHHHHFHEFFHFF
+ at SEQ:1:1101:9487:3957#0/1
+CAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATG
++
+HHHHHHHHHHHHHHHGEHHHGHHHHHHHEGFHGHHHGHHHHGGHHHHHGHHHHHHHHHHFHHB>EFHFHBHFHCFHHGGGHEGHEGHEF at GHHFHEDHH;H
+ at SEQ:1:1101:9309:3957#0/1 adapter start: 72
+GTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHGHFHHHFHHHHHHHHGHHHFHHHHHHHFHDHHHHHHFHCHHEAHHDG
+ at SEQ:1:1101:9425:3960#0/1
+CTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGC
++
+8?8?C?BC at BD=ABB==BD?CADD=AD>C@@CCBBDD at B/143'3.>>@9BCBDDDC8@@;<A=<DDDDB?A:A;9:2-74,<82;9877CBCDD/B at 5;<
+ at SEQ:1:1101:9337:3969#0/1
+GAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAA
++
+DBFEFFDEEEBFFFFF8FF=D=DDDEEE=E>@???FB=DFB=>C=EEFFFFFEFFFFF:FEF at FEF<FFFFF?DFDD8DDBD=DBFEB at E6FECF@EB8E?
+ at SEQ:1:1101:9388:3971#0/1
+CTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGG
++
+HHHHHHFHHHHHHHHHHHHHHHHHHHHFHHGHHHFHHHHHHGHHHHHEFHHHFHHFEHHFEHHFFHHHHECFDF?HHHHGEGGHHHFHHHFEGCFFFFF=E
+ at SEQ:1:1101:9414:3978#0/1 adapter start: 99
+TTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCGC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHFFHHHHG at HFHDHGHDHHHHHHFGHHGHG
+ at SEQ:1:1101:9494:3983#0/1 adapter start: 72
+TAGCACCAAACATAAATCACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHBF?FBHHFEHB?HEFEHBGEDEEBEDEEFACAFE>
+ at SEQ:1:1101:9363:3989#0/1 adapter start: 95
+CCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTGCCTAA
++
+HHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHGHHHHHHHG<GFGGGGFGHHHHHHEEEEHHDEFHHFHHHFHHDHEGHHHHBHHGCGF8ECEEFFEDBA=
+ at SEQ:1:1101:9436:3998#0/1 adapter start: 67
+TAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGAT
++
+HHHHHHHHHHHHHHHHGGDHHHHHHFHHFGHHHHHHDHHFFDFGEFFHDFCFFEBDFHFFFFEEDEB
+ at SEQ:1:1101:9621:3755#0/1
+AGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGAAAAATATCAACCACACCAGAAGCAGCA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHFHHFF?FHHFHFHHHHEHHC at FEHFHFHBGGGFHHHHHHDHHFFHGFHA
+ at SEQ:1:1101:9738:3756#0/1 adapter start: 1
+T
++
+H
+ at SEQ:1:1101:9580:3761#0/1 adapter start: 49
+TATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGAT
++
+HHHHHHHHHGGHHHHHEHHHFHEHHGEGGHFGDFGHFHFGHHFFDH?EF
+ at SEQ:1:1101:9533:3764#0/1 adapter start: 20
+TCTGTTGAACACGACCAGAA
++
+FEFFFF at FFDFFEFFDDBDD
+ at SEQ:1:1101:9636:3775#0/1
+ATAAGGCCACGTATTTTGCAAGCTATTTAACTGGCGGCGATTGCGTACCCGACGACCAAAATTAGGGTCAACGCTACCTGTAGGAAGTGTCCGCATAAAGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHFH6HHHHHHHHHHFFHFCHFDCHFE;DAD9BDDDDGFGDGDGGB<FDCDCDGF>GEEGB;5
+ at SEQ:1:1101:9554:3781#0/1
+CACGCTCTTTTAAAATGTCAACAAGAGAATCTCTACCATGAACAAAATGTGACTCATATCTAAACCAGTCCTTGACGAACGTGCCAAGCATATTAAGCCAC
++
+HHHHHHHHHHHHHGGHHHHHHGHFHHHHHHEHHFHHHEHHHHHHHEHHGHHHHEHHHGFHHHEHHHHHHEEFFEDFEDFF>ACBAHGHHHHECEGHBCFEE
+ at SEQ:1:1101:9695:3783#0/1 adapter start: 52
+AATAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHGHHHHHHHHHHF
+ at SEQ:1:1101:9572:3788#0/1
+ACCAACACGGCGAGTACAACGGCCCAGCTCAGAAGCGAGAACCAGCTGCGCTTGGGTGGGGCGATGGTGATGGTTTGCATGTTTGGCTCCGGTCTGTAGGC
++
+FFFFFFFFF=EBEB0A at A@<BD:EEFFA at EEEDE?EDE8<E?EE=E:BBB>>A?;FED;;<7??A>>9A>?DA1ADD?D:FF:BC;@##############
+ at SEQ:1:1101:9601:3793#0/1
+GCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGAC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHEHEGHFHHHHHHHHFHFHCHHHFHFFHHHHHH at HHHHHHGHHHFHHGFHHCFHEGGGFEGE?GCDAD6AD
+ at SEQ:1:1101:9634:3800#0/1
+TTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGG
++
+HHGHFHFHHHHCGHHFHHHHHHGEHHHHHGFBEFHHFEHDHHHGFHHEHHFF9ECD?CEEHED<HBDEEBFEDEEE<FDFDGFBEHHEHCE>F?GEEDEEG
+ at SEQ:1:1101:9501:3800#0/1 adapter start: 42
+TGACCACCTACATACCAAAGACGAGCGCCTTTACGCTTGCCT
++
+HHHHHHHHHHHHHHHHFHHHHHHHHFHHHHHHHHHHHHHHHH
+ at SEQ:1:1101:9703:3807#0/1 adapter start: 27
+TAATAACCTGATTCAGCGAAACCAATC
++
+HHHHHHHHHHHHHHHHHHHHHHGHHHG
+ at SEQ:1:1101:9728:3808#0/1 adapter start: 7
+CAGAAAA
++
+HHHFHHH
+ at SEQ:1:1101:9676:3812#0/1 adapter start: 1
+T
++
+H
+ at SEQ:1:1101:9620:3815#0/1
+TCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGA
++
+HHHHHHHHHHGGHHHGHHGHHHHHHHHHHGFHGHHHHHHHHHFHDHHHDDHFHFHFHHHHFF9EFF>DG?FCBCDFFFEBFFE at DFEGGEEG?GF>>:;@A
+ at SEQ:1:1101:9720:3834#0/1 adapter start: 74
+TAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT
++
+HGHHHHHHHHHHHHHHHGGHEGGFGHFGHFHHDGHGHGHHHHHHHHHHFHHHHHFHFHFFHEFHF=FFHFHHFF
+ at SEQ:1:1101:9635:3844#0/1 adapter start: 4
+GACC
++
+HHHH
+ at SEQ:1:1101:9744:3849#0/1 adapter start: 55
+AAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTC
++
+HHHHHHHGCHHFHHFHHFFHEHFGCHHGDGHEFFHFHEHHGBBGFCDGFEEFDCF
+ at SEQ:1:1101:9725:3850#0/1
+ATAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGA
++
+FDGGGDGGGEGGGGGBGBEGFFFDFFFFGGFGGGGFBGGGGGEFDFFGEGFFEFEDGGEEF9DCF?EFBBEDBBGFGGEGGGGCFGFEB at B7C>CDEEE##
+ at SEQ:1:1101:9544:3854#0/1
+TAGCGGTAAAGTTAGACCAAACCATGAAACCAACATAAACATTATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHFFHHHHHHHHHBFHHHHHFHHHHHHHHHHHHHHFCHHHBHE
+ at SEQ:1:1101:9581:3856#0/1
+GGGCGGTGGTCTATAGTGTTATTAATATCAAGTTGGGGGAGCACATTGTAGCATTGTGCCAATTCATCCATTAACTTCTCAGTAACAGATACAAACTCATC
++
+HHHHHHEHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHFHHHHGHHHHHHHHHHHHHHHGGHHHFHHHHHGHFGHGEGHHHHHHFEHFHGDGGFFGHH at DH
+ at SEQ:1:1101:9649:3858#0/1 adapter start: 33
+CCTCCAAACAATTTAGACATGGCGCCACCAGCA
++
+B<B at A@AAB>FEEEE@@BA at 3>8<>CCDDBEE@
+ at SEQ:1:1101:9616:3862#0/1 adapter start: 91
+GAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHEHHHHHHHHHHHHHHFHHHHHHHFFFHFDHHEHHHGHHHHGDEHHGHHEGH
+ at SEQ:1:1101:9696:3866#0/1
+CAAGTTGCCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAAC
++
+HHHHHHHHHHHHHHHHHHHHEHEHHHEHHHHFHHHHHHFHHHFHFHHHHHHHHFHHHHFHHFEHBHFEHHHHCEEHHFHHHHHHHHHHHHEHHHHCAFEFG
+ at SEQ:1:1101:9512:3869#0/1
+GCTCGACGCCATTAATAATGTTTTCCGTAAATTCAGCGCCTTCCATGATGAGACAGGCCGTTTGAATGTTGACGGGATGAACATAATAAGCAATGACGGCA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHFHHHDHHHEHHFFFFFFHFAFEFH?E at FFGGGFGHFHAEFGFFFCEEFF
+ at SEQ:1:1101:9723:3870#0/1 adapter start: 66
+CTTTAGCAGCAAGGTATATATCTGACTTTTTGTTAACGTATTTAGCCACATAGCAACCAACAGACA
++
+##################################################################
+ at SEQ:1:1101:9667:3874#0/1
+CTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHAHHHHEHHD=DAD>D6ADGE at EBE;@?BCGGE?4>ADAAC
+ at SEQ:1:1101:9565:3879#0/1 adapter start: 24
+AGCCTTATGGCCGTCAACATACAT
++
+HHHHHHHHHHHHHHHHHFHHGFFH
+ at SEQ:1:1101:9721:3885#0/1 adapter start: 51
+TTCCTCAAACGCTTGTTCGGTGGATAAGTTATGGCATTAATCGATTTATTT
++
+>BC?:A?=<>::A=528882.53)5.77;407)*9@:AA8CAA########
+ at SEQ:1:1101:9707:3894#0/1 adapter start: 40
+AACACCATCCTTCATGAACTTAATCCACTGTTCACCATAA
++
+F at F8DEE@EEBCCCCFFEFDDC=DCCFFF=ADD=D>@AA@
+ at SEQ:1:1101:9560:3900#0/1 adapter start: 6
+AGAAGT
++
+GGGGGF
+ at SEQ:1:1101:9696:3913#0/1 adapter start: 2
+CC
++
+HH
+ at SEQ:1:1101:9574:3914#0/1 adapter start: 5
+GAACA
++
+HHHHH
+ at SEQ:1:1101:9508:3931#0/1 adapter start: 91
+TAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAA
++
+HGHHHHHHHHHHHHHHHHHHGHHHHHFHHHGHHHHFHHHHHHHHHD?ACFEF9FFEEBHBAEFB?E<F5CAD(DAEE;AE at C?D at BDGF?F
+ at SEQ:1:1101:9617:3935#0/1
+TAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHHHHHHFHHHFFEDFEFHFHHFHFHGHHFHHHFHHEHHHFHHHHFB
+ at SEQ:1:1101:9667:3950#0/1 adapter start: 66
+CTTTAGCCATAGCACCAGAAACAAAACTAGGGACGGCCTCATCAGGGTTAGGAACATTAGAGCCTT
++
+HHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHEHHHHDGHFHHHHHHHBHH
+ at SEQ:1:1101:9705:3951#0/1 adapter start: 29
+ATTGCGTACCCGACGACCAAAATTAGGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at SEQ:1:1101:9527:3965#0/1
+AACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGTTTGGTCAGTTCC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHFHHGHHFHEHHHEFEFF at HFHFGGGDGGHFGDFHFGHGHHFGHG
+ at SEQ:1:1101:9550:3969#0/1
+AGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGTTTGGTC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHFHHHHHHHHHHHHFHHHFHHFEHHHHHHHHHHHHHGHHFHHHHFHHHHHHHHHHHG
+ at SEQ:1:1101:9636:3973#0/1 adapter start: 9
+CAAGCGCAA
++
+HHHHHHHHH
+ at SEQ:1:1101:9726:3981#0/1 adapter start: 66
+TTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAAT
++
+HHFEHHHHHHHHHHHHHHHHHHHHHHGHGHHGHGHHHHHHHHHHGGHHHEHHGFHHHHHEHHEHGH
+ at SEQ:1:1101:9603:3981#0/1 adapter start: 32
+TCTAAGAAGTTTAAGATTGCTGAGGGTCAGTG
++
+HHHHHHHHHHHHHHHFHHHHHHHHHHEHHHHH
+ at SEQ:1:1101:9533:3990#0/1 adapter start: 1
+G
++
+B
+ at SEQ:1:1101:9583:3992#0/1 adapter start: 20
+AAGGTACTGAATCTCTTTAG
++
+98583=>><>B at CBCD==BB
+ at SEQ:1:1101:9903:3754#0/1
+ACCAAAATTAGGGTCAACGCTACCTGTAGGAAGTGTCCGCATAAAGTGCACCGCATGGAAATGAAGACGGCCATCAGCTGTACCATACTCAGGCACACAAA
++
+GFEGGGGGBGE at EAEEGGFGGEGGFGEFFGFGFFGGEGGGGEFGCFCEFBF7FGEGEF?BFEEFDFFE??AADD+D at C@CGFCE6FDFFDFBGFDD at DAAD
+ at SEQ:1:1101:9878:3755#0/1 adapter start: 32
+AGAACGTGAAAAAGCGTCCTGCGTGTAGCGAA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at SEQ:1:1101:9833:3756#0/1 adapter start: 65
+TCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTC
++
+HHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHFHHHHGHHFHHHHHEHEHHHHFHEHHHEHFH
+ at SEQ:1:1101:9991:3777#0/1
+GCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGT
++
+HHHHHHHHHHHHHHHHHHHHHHGHHHGHHHHHHHGHHHHHHGHHHHHHHHHHHHHFHHFFDFFFCFFDHCFF;BFGEFGEGFGGFFF.CFDCCEDB=CBC@
diff --git a/tests/cut/illumina.info.txt b/tests/cut/illumina.info.txt
new file mode 100644
index 0000000..f8ad1fc
--- /dev/null
+++ b/tests/cut/illumina.info.txt
@@ -0,0 +1,100 @@
+SEQ:1:1101:9010:3891#0/1 adapter start: 51	1	51	81	ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AAGCCAAGATGGGAAAGGTC	adapt	FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDF	FDB4B?DB21;84?DDBC9DEBAB;=@<@@	B@@@@B>CCBBDE98>>0 at 7
+SEQ:1:1101:9240:3898#0/1	-1	CCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTG	GHGHGHHHHGGGDHHGDCGFEEFHHGDFGEHHGFHHHHHGHEAFDHHGFHHEEFHGHFHHFHGEHFBHHFHHHH at GGGDGDFEEFC@=D?GBGFGF:FB6D
+SEQ:1:1101:9207:3899#0/1 adapter start: 64	1	64	94	TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAAC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATACATA	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHF	HHFHFFFFFBHHGHHHFFHHFHGGHHDEBF	G<FGGDG
+SEQ:1:1101:9148:3908#0/1 adapter start: 28	1	28	58	ACGACGCAATGGAGAAAGACGGAGAGCG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CCAACGGCGTCCATCTCGAAGGAGTCGCCAGCGATAACCGGAG	adapt	HHHHHHHHHHHHGHHHHGHHHHHHHHHH	HHHHHHHHHHHHHHHHHGHHHHDHDHHFHH	HHHFFFFFHHHEFBEGEGGFFFHHHFHHHHHHFHHEHHGHEHD
+SEQ:1:1101:9044:3916#0/1 adapter start: 78	1	78	101	AACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGA	GCCTAACTTCTTAGACTGCCTTA		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHGHHHHHHHHHHHHFHEBFHFFEFHE	FHHGHFHHHHGGHGHHFHGGGHG	
+SEQ:1:1101:9235:3923#0/1	-1	TTGATGCGGTTATCCATCTGCTTATGGAAGCCAAGCATTGGGGATTGAGAAAGAGTAGAAATGCCACAAGCCTCAATAGCAGGTTTAAGAGCCTCGATACG	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHBHHFHFHHHHHFHHCHHFFHHHHEHHFDHCEEHHHFHHFHFEHHHHHHHHHEHHGFHH<FGGFABGGG?
+SEQ:1:1101:9086:3930#0/1 adapter start: 46	1	46	76	CCATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CCGAAGAAGACTCAAAGCGAACCAA	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH at HHEHHHFH	HHHFHHHHHFFHFFHHBHFFHFHHCFFHFH	HFHHHHEEHHGHHFEHFHGGEHEFH
+SEQ:1:1101:9028:3936#0/1	-1	CTTGATATTAATAACACTATAGACCACCGCCCCGAAGGGGACGAAAAATGGTTTTTAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGC	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHCHFHHFHGBEFFFEFEFHEHHHFEHHFEEC>CDCEEEFDFFHHHCFFEFE?EBFEB?3
+SEQ:1:1101:9185:3939#0/1	-1	CGTTGAGGCTTGCGTTTATGGTACGCTGGACTTTGTAGGATACCCTCGCTTTCCTGCTCCTGTTGAGTTTATTGCTGCCGTCATTGCTTATTATGTTCATC	HHHHHHHHHHHHHHFHHEHHHDHHFGHHHCHHHHHDHHHHFECEGBD<DCFHBHBBEEEGCCCDB?C9DECCC3CD<@DA<@>@@?A?DAFF9F<@@08?<
+SEQ:1:1101:9140:3961#0/1 adapter start: 66	1	66	96	CAGGAGAAACATACGAAGGCGCATAACGATACCACTGACCCTCAGCAATCTTAAACTTCTTAGACG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AATCA	adapt	HHHHHHHGHHHHHHHHHHHGHHHHHHHHHHHHHHHHFHHHHHHFGHHHHHHHHHHHHHHHHDHHFH	HHHEHHFHFHHHHHGHHHHHFHGHGHHHHH	EHCFG
+SEQ:1:1101:9073:3961#0/1 adapter start: 49	1	49	79	GTGGCAAGTCTGCCGCTGATAAAGGAAAGGATACTCGTGATTATCTTGC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TGCTGCATTTCCTGAGCTTAAT	adapt	HHHHHHHHFHHHHHHGHHHHHHHHHEHHGHHGHHHHHHHHHHGEHHHHH	GFHFFGHFHHGHHCHHFDGHHHHHFHHHFC	DFGHHHHHHCFGHHEGEFBGGB
+SEQ:1:1101:9196:3971#0/1 adapter start: 18	1	18	48	ACCAGAAGGCGGTTCCTG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AATGAATGGGAAGCCTTCAAGAAGGTGATAAGCAGGAGAAACATACGAAGGCG	adapt	HHHHHHHHHFHHHHHHHH	HGHHHGHHHHHHHFHHHHHHHHHHHEHHHH	HHHHHHHHFHHGHHHHHEHFHHHHBHEHHGEHFHFHHFHHHHFBDFHF?HHHH
+SEQ:1:1101:9053:3973#0/1	-1	TTCACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGCCGCTTAAAGCTACCAGGTTTATTGCTGTTTGTTTCTATGTGGCTTAAAACGTTACCA	A39>A################################################################################################
+SEQ:1:1101:9120:3979#0/1	-1	GGCGTTGACAGATGTATCCATCTGAATGCAATGAAGAAAACCACCATTACCAGCATTAACCGTCAAACTATCAAAATATAACGTTGACGATGTAGCTTTAG	HHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFGFFDHBHHHFGEHHHFGHHHEHHHGH
+SEQ:1:1101:9045:3988#0/1 adapter start: 91	1	91	101	TAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGCAGTGTTAA	GCCTAACTTC		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHFHHHHHHHHHHHFHHHHHHDHHHHHHHFHFFHHGHEHHGHHHGHGHHFH	GHHFFFEFFE	
+SEQ:1:1101:9418:3756#0/1	-1	TAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTGT	HHHHHHHHHHHHHHHHFHHHGHEHHHFHHHHFFEHHFHHHHGHHFHFHHHGHHHDHFHCHFCFBCFEFDEHHHHHG at GGGGHHGHFFEG=AB at C:EDEEEH
+SEQ:1:1101:9394:3759#0/1	-1	CCCTCGCTTTCCTGCTCCTGTTGAGGTTATTGCTGCCGTCATTGCTTATTATGTTCATCTCGGCAACATTCATACGGTCTGGCTTATCCGTGCAGAGACTG	#####################################################################################################
+SEQ:1:1101:9365:3766#0/1	-1	AAGCACATCACCTTGAATGCCACCGGAGGCGGCTTTTTGACCGCCTCCAAACAATTTAGACATGGCGCCACCAGCAAGAGCAGAAGCAATACCGCCAGCAA	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFFHHHHFHHHHEHHFGHHHHFEHHHHFEHHFDFFAFHEFHFHDFFFFHHDH?DFABFDHADFDHHHFBF
+SEQ:1:1101:9436:3776#0/1	-1	GAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGGAGTCGGA	HHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHFHGHHHHHHHGHHHHHHFDHHHHHHHHHHHHHFH?HHHHHFBHEH at GHHGD=EEEE88==%893A@@;
+SEQ:1:1101:9354:3801#0/1	-1	CCAGCAAGAGCAGAAGCAATACCGCCAGCAATAGCACCAAACATAAATCACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGC	HHHHHHHHHGHHGHHEGHHEHFGFEHHGHGGHHHHHHHFHGHHFHHEFFFHEHHFHHHDHE5EDFCAC+C)4&27DDA?7HFHDHEFGFG,<@7>?>??<A
+SEQ:1:1101:9389:3804#0/1 adapter start: 28	1	28	58	ATTAGAGCCAATACCATCAGCTTTACCG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAA	adapt	GGGGFDGGHFHHHFFFGBEFGGGGGEFE	EFFGHFHHFHFDEFFEFHHHBFEFDD=BDD	DFHBE>EDC at FDDDDCDFE?DEEFGF<EE?F?GGEF>CC@;@D
+SEQ:1:1101:9477:3819#0/1 adapter start: 28	1	28	58	ATAAAGGAAAGGATACTCGTGATTATCT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TGCTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGT	adapt	HHHHHHHHHHHHHHHHGHHHHHHHHHHH	HHHHHHHHHHHHHFHHFHFHHHHHHHEHHH	HHEHHHHHHEHHDHDHBHHGCEHHHHHGGEFGG=DGDGCGC68
+SEQ:1:1101:9428:3823#0/1	-1	CGTCAGTAAGAACGTCAGTGTTTCCTGCGCGTACACGCAAGGTAAACGCGAACAATTCAGCGGCTTTAACCGGACGCTCGACGCCATTAATAATGTTTTCC	HHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHFGHGHHHHHHHEHHHHFHHHHHFHHHFHH?FHEFFFDGFDAFDCFAFDBFGBFGFHHHHHHHHHFHFH;8
+SEQ:1:1101:9403:3824#0/1 adapter start: 70	1	70	100	GCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	C	adapt	HHHHHHHHHHHHHHHHHHHEHHHHHHHHHHHHHHHHGDHDHHHHHHHHHGHHHHGHEHGHHHHFFHHHHH	EHFHFEHHFGBFFFDHCEHHHHGH=HHH=G	E
+SEQ:1:1101:9362:3824#0/1	-1	ACCATGAAACCAACATAAACATTATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATC	HHHHHHHGHHHHHHHHHHHHHHHGHHHHHFHHHHHHHHFHHFHHHFHHHHHHHHHFHEHHHFHBHFHHHFCEFDEHHHHGHHHHHHHHHEFFFHHFFFDAG
+SEQ:1:1101:9480:3842#0/1 adapter start: 54	1	54	84	GTACGGATTGTTCAGTAACTTGACTCATGATTTCTTACCTATTAGTGGTTGAAC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CGCATCGGACTCAGATA	adapt	BDCCC at 5<<<@BBB7DDDDD<<<9>::@<5DDDDDCDCBEDCDDDDBDDDBAA1	/82638?D=CD2*><6BC<CC7;=;*CBCC	AC at 73C2=3<<@,CB at D
+SEQ:1:1101:9286:3846#0/1	-1	TGATTAAACTCCTAAGCAGAAAACCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGTTATAACCTCACACT	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHFHHDGCEGGHHHHFHHFHEHHFHEGHGHGF
+SEQ:1:1101:9403:3867#0/1 adapter start: 1	1	1	31	G	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGT	adapt	H	HHHHHHHHHHHHHHHHHHHHHHHHHHHHGH	HHHHHHHHHHHHHHHHHHHHHHHHHHHFHFHHHHHHHHHDFFBFHGGGFHHHHHHHHHHHHHHEBHHHFB
+SEQ:1:1101:9341:3873#0/1 adapter start: 88	1	88	101	CCTAAGCAGAAAACCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGTTATAACCTCAC	GCCTAACTTCTTA		adapt	HHHHHHHGGFHGHHHHHGHHHHFGHGHHHHEHHHFHFHFHFHH?CEEEDFCEFCDFFHFEABEDF.ECDCDFEEEEEGGFADACDHHH	BAFG3FF:BBE##	
+SEQ:1:1101:9381:3881#0/1 adapter start: 41	1	41	71	ACGTTCTGGTTGGTTGTGGCCTGTTGATGCTAAAGGTGAGC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CGCTTAAAGCTACCAGTTATATGGCTGTTG	adapt	HHHHHHHHHHHHGHGHDHHHHHHHHFEHHHGGGGFFBGFFF	HFEHEHHEF>FGFF?E?FEFFHBBFEE3E,	;/97-0(6,?=BB at A@D9D###########
+SEQ:1:1101:9360:3884#0/1	-1	TAATACCTTTCTTTTTGGGGTAATTATACTCATCGCGAATATCCTTAAGAGGGCGTTCAGCAGCCAGCTTGCGGCAAAACTGCGTAACCGTCTTCTCGTTC	HGDEHGHDGHFGFGHFDFFF7EEEEGGFGGEGHEGHHHHFFFEHHHFHEHFBFFF>?DEEBF=?CDB:DFBGFBBGDFFHF?FAFGGABFGGFAFE6EDDC
+SEQ:1:1101:9323:3894#0/1 adapter start: 100	-1	ATACCGATATTGCTGGCGACCCTGTTTTGTATGGCAACTTGCCGCCGCGTGAAATTTCTATGAAGGATGTTTTCCGTTCTGGTGATTCGTCTAAGAAGTTG	HHGHHHHHHHHHHHHHHHHHHHEHDHHHHHGEHHFFHHFFFHHHHHHHHFHDHHBHGHB?HHDFFF?EFEHFHBFGEGGFFFDFBHFHHHHHFHHEFFFCF
+SEQ:1:1101:9267:3900#0/1 adapter start: 89	1	89	101	GTTTTGGATTTAACCGAAGATGATTTCGATTTTCTGACGAGTAACAAAGTTTGGATTGCTACTGACCGCTCTCGTGCTCGTCGCTGCGT	GCCTAACTTCTT		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHFHHHHEHHEHHHFHHHHHHHHHHFHFHECFFHABGGGIGHHHGGFFGF	FCACFECEB5<;	
+SEQ:1:1101:9416:3909#0/1	-1	TAAACGTGACGATGAGGGACATAAAAAGTAAAAATGTCTACAGTAGAGTCAATAGCAAGGCCACGACGCAATGGAGAAAGACGGAGAGCGCCAACGGCGTC	HHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHHHHEHHGHHFEFHEFHFFDHEFHFAFFFA?GDFGFE at FFFB?B7EEFEFE?DAA##
+SEQ:1:1101:9360:3917#0/1 adapter start: 68	1	68	98	ATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AAA	adapt	HHHHHHHHHHHHHHHHHHHFHHHHHHHHHHFHHHHHHHFHEFHHHEHHCFFEFEE9AFFBBDCDCAEE	EFHD??<DFEEEEHHEEBFEGBDEHCHFE?	GE@
+SEQ:1:1101:9337:3918#0/1 adapter start: 14	1	14	44	CATCAGCACCAGCA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CGCTCCCAAGCATTAAGCTCAGGAAATGCAGCAGCAAGATAATCACGAGTATCCTTT	adapt	FDEGGGCDBEFCDF	FBGFFGGEGEDE=GGGEGGGEFFCCFGF7E	FFEGDEGCF;BFBEBFFCD5FEDCDA=95>>E4 at EC>74<-5@##############
+SEQ:1:1101:9307:3927#0/1 adapter start: 15	1	15	45	TCAGCGCCTTCCATG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATGAGACAGGCCGTTTGAATGTTGACGGGATGAACATAATAAGCAATGACGGCAGC	adapt	FFFFFFFFFFFFFDF	=EEEEDFFFFBEEEEFFFFFFFFFFFDEEB	DFFFFDFFFFEF at FFFBEFFBFFEF--@@<FFBFFFF?FFEBEDEFEFFF######
+SEQ:1:1101:9479:3929#0/1 adapter start: 9	1	9	39	GACAAATTA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GAGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTT	adapt	HHHHHHHHH	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH	HHHFHFFHHFHFHHFFFHHHHHHFHHAE?EEHFFCFGGGAGGEGFFHHHHHGFHH?GHGHEG
+SEQ:1:1101:9277:3934#0/1 adapter start: 71	1	71	101	CTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCTGCTTC	GCCTAACTTCTTAGACTGCCTTAAGGACGT		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHEHFHHHHFHHHHHFHHEHFHHHFHHFDHHFHHE	FACFGEFFGEHDFFEHBHHDBEFEHFHHBC	
+SEQ:1:1101:9442:3934#0/1	-1	AGACCCATAATGTCAATAGATGTGGTAGAAGTCGTCATTTGGCGAGAAAGCTCAGTCTCAGGAGGAAGCGGAGCAGTCCAAATGTTTTTGAGATGGCAGCA	HHHHHHHHHGHHHHHFGHHBHHEHGFHHDHGDEGDHHHHHFHHHHHAHHH?FEEBEFDFBEBEEFEHFE7ECCDCG=FDFFDFFFHHHHFEEBEF;BEAEG
+SEQ:1:1101:9329:3935#0/1	-1	AGATGGATAACCGCATCAAGCTCTTGGAAGAGATTCTGTCTTTTCGTATGCAGGGCGTTGAGTTCGATAATGGTGATATGTATGTTGACGGCCATAAGGCT	GFGGGEEGDHHHGGEHHHHHHGGFHHEAHHAGDEGEGGEDG at GGGHHGHHFGGH6@CADDHHBEEE at 8EBGEEFGGGHFHHHHGEGFGGEFBGEDDE?E7E
+SEQ:1:1101:9445:3956#0/1 adapter start: 81	1	81	101	TGCAACAACTGAACGGACTGGAAACACTGGTCATAATCATGGTGGCGAATAAGTACGCGTTCTTGCAAATCACCAGAAGGC	GCCTAACTTCTTAGACTGCC		adapt	HHHHHHHHHGFHHHHHHHHHHHHHHGHHHHFHHHHHHHHHHHFGHHHFGHHHHFGHHFHEHHHHHHHHHHHHGBHHHHGFG	GGEGGGGFDHHHFHHGGEBE	
+SEQ:1:1101:9357:3957#0/1	-1	TTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCATGAACTTAATCCACTG	HHHHHHGHHHHHHHHHHGHEHHHHHGHEHHHHHHHHHHHHHHGHEBGGFGFFFFFBH?HCEEED<FEEEFFHHDHHHHEHHHGFHHH:BHHHHFHEFFHFF
+SEQ:1:1101:9487:3957#0/1	-1	CAATAAAATCCCTAAGCATTTGTTTCAGGGTTATTTGAATATCTATAACAACTATTTTAAAGCGCCGTGGATGCCTGACCGTACCGAGGCTAACCCTAATG	HHHHHHHHHHHHHHHGEHHHGHHHHHHHEGFHGHHHGHHHHGGHHHHHGHHHHHHHHHHFHHB>EFHFHBHFHCFHHGGGHEGHEGHEF at GHHFHEDHH;H
+SEQ:1:1101:9309:3957#0/1 adapter start: 72	1	72	101	GTCAGATATGGACCTTGCTGCTAAAGGTCTAGGAGCTAAAGAATGGAACAACTCACTAAAAACCAAGCTGTC	GCCTAACTTCTTAGACTGCCTTAAGGACG		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHGHFHHHFHHHHHHHHGHHHFHHHHHHHFHDHHHHHHFHCHHEAHHDG	GHFHFHDHHHGHHEHHFFH?HHHFDGGG?	
+SEQ:1:1101:9425:3960#0/1	-1	CTGACGCAGAAGAAAACGTGCGTCAAAAATTACGTGCAGAAGGAGTGATGTAATGTCTAAAGGTAAAAAACGTTCTGGCGCTCGCCCTGGTCGTCCGCAGC	8?8?C?BC at BD=ABB==BD?CADD=AD>C@@CCBBDD at B/143'3.>>@9BCBDDDC8@@;<A=<DDDDB?A:A;9:2-74,<82;9877CBCDD/B at 5;<
+SEQ:1:1101:9337:3969#0/1	-1	GAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGCCATCAACTAA	DBFEFFDEEEBFFFFF8FF=D=DDDEEE=E>@???FB=DFB=>C=EEFFFFFEFFFFF:FEF at FEF<FFFFF?DFDD8DDBD=DBFEB at E6FECF@EB8E?
+SEQ:1:1101:9388:3971#0/1	-1	CTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGG	HHHHHHFHHHHHHHHHHHHHHHHHHHHFHHGHHHFHHHHHHGHHHHHEFHHHFHHFEHHFEHHFFHHHHECFDF?HHHHGEGGHHHFHHHFEGCFFFFF=E
+SEQ:1:1101:9414:3978#0/1 adapter start: 99	-1	TTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGAGAGGAGTGGCATTAACACCATCCTTCGC	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHFFHHHHG at HFHDHGHDHHHHHHFGHHGHG
+SEQ:1:1101:9494:3983#0/1 adapter start: 72	1	72	101	TAGCACCAAACATAAATCACCTCACTTAAGTGGCTGGAGACAAATAATCTCTTTAATAACCTGATTCAGCGA	GCCTAACTTCTTAGACTGCCTTAAGGACG		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHBF?FBHHFEHB?HEFEHBGEDEEBEDEEFACAFE>	EFBGGGFFGHFFHD5DGB=>>@;A>C5?A	
+SEQ:1:1101:9363:3989#0/1 adapter start: 95	-1	CCTCCAAGATTTGGAGGCATGAAAACATACAATTGGGAGGGTGTCAATCCTGACGGTTATTTCCTAGACAAATTAGAGCCAATACCATCAGCTTTGCCTAA	HHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHGHHHHHHHG<GFGGGGFGHHHHHHEEEEHHDEFHHFHHHFHHDHEGHHHHBHHGCGF8ECEEFFEDBA=
+SEQ:1:1101:9436:3998#0/1 adapter start: 67	1	67	97	TAAATTGTTTGGAGGCGGTCAAAAAGCCGCCTCCGGTGGCATTCAAGGTGATGTGCTTGCTACCGAT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AACA	adapt	HHHHHHHHHHHHHHHHGGDHHHHHHFHHFGHHHHHHDHHFFDFGEFFHDFCFFEBDFHFFFFEEDEB	EFFF9FEFGGFDBGEBBFGGBFBD6DDAF<	EEBE
+SEQ:1:1101:9621:3755#0/1	-1	AGCCAATACCATCAGCTTTACCGTCTTTCCAGAAATTGTTCCAAGTATCGGCAACAGCTTTATCAATACCATGAAAAATATCAACCACACCAGAAGCAGCA	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHFHHFF?FHHFHFHHHHEHHC at FEHFHFHBGGGFHHHHHHDHHFFHGFHA
+SEQ:1:1101:9738:3756#0/1 adapter start: 1	1	1	31	T	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAACGAACAAGCGC	adapt	H	HHHHHHHHHHHHHHHHHHHHHHHFHHEHHH	HHHHHHHHHHHHFFGHHHBHHFHHHEHHHHHHFHHHHFHHHDDFGEFFDFFEFFEFHGBEGGDGHEGEFF
+SEQ:1:1101:9580:3761#0/1 adapter start: 49	1	49	79	TATTAAGCTCATTCAGGCTTCTGCCGTTTTGGATTTAACCGAAGATGAT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TTCGATTTTCTGACGAGTAACA	adapt	HHHHHHHHHGGHHHHHEHHHFHEHHGEGGHFGDFGHFHFGHHFFDH?EF	HHEFEHEFGGG4ADCDE=ECEC<:=?DD>B	B;FBFFEGGEGB==EGFHH<DB
+SEQ:1:1101:9533:3764#0/1 adapter start: 20	1	20	50	TCTGTTGAACACGACCAGAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AACTGGCCTAACGACGTTTGGTCAGTTCCATCAACATCATAGCCAGATGCC	adapt	FEFFFF at FFDFFEFFDDBDD	B??<@FFFEEEEEEEFEEFFFCFFFFEBFF	FD at FBFBFFFE@BFFFFBF=ADD;@?@?AAFBEFFDA=FEFEFFB at -C?BE
+SEQ:1:1101:9636:3775#0/1	-1	ATAAGGCCACGTATTTTGCAAGCTATTTAACTGGCGGCGATTGCGTACCCGACGACCAAAATTAGGGTCAACGCTACCTGTAGGAAGTGTCCGCATAAAGT	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHFH6HHHHHHHHHHFFHFCHFDCHFE;DAD9BDDDDGFGDGDGGB<FDCDCDGF>GEEGB;5
+SEQ:1:1101:9554:3781#0/1	-1	CACGCTCTTTTAAAATGTCAACAAGAGAATCTCTACCATGAACAAAATGTGACTCATATCTAAACCAGTCCTTGACGAACGTGCCAAGCATATTAAGCCAC	HHHHHHHHHHHHHGGHHHHHHGHFHHHHHHEHHFHHHEHHHHHHHEHHGHHHHEHHHGFHHHEHHHHHHEEFFEDFEDFF>ACBAHGHHHHECEGHBCFEE
+SEQ:1:1101:9695:3783#0/1 adapter start: 52	1	52	82	AATAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GCCAAGAAAAGCGGCATGG	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHGHHHHHHHHHHF	HHHHHHFHGEHEHHHHHGHHHHHHHHHFHH	FHGGHHHHHHGGHGFHHHG
+SEQ:1:1101:9572:3788#0/1	-1	ACCAACACGGCGAGTACAACGGCCCAGCTCAGAAGCGAGAACCAGCTGCGCTTGGGTGGGGCGATGGTGATGGTTTGCATGTTTGGCTCCGGTCTGTAGGC	FFFFFFFFF=EBEB0A at A@<BD:EEFFA at EEEDE?EDE8<E?EE=E:BBB>>A?;FED;;<7??A>>9A>?DA1ADD?D:FF:BC;@##############
+SEQ:1:1101:9601:3793#0/1	-1	GCCGCTAATCAGGTTGTTTCTGTTGGTGCTGATATTGCTTTTGATGCCGACCCTAAATTTTTTGCCTGTTTGGTTCGCTTTGAGTCTTCTTCGGTTCCGAC	HHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHEHEGHFHHHHHHHHFHFHCHHHFHFFHHHHHH at HHHHHHGHHHFHHGFHHCFHEGGGFEGE?GCDAD6AD
+SEQ:1:1101:9634:3800#0/1	-1	TTTATGCGGACACTTCCTACAGGTAGCGTTGACCCTAATTTTGGTCGTCGGGTACGCAATCGCCGCCAGTTAAATAGCTTGCAAAATACGTGGCCTTATGG	HHGHFHFHHHHCGHHFHHHHHHGEHHHHHGFBEFHHFEHDHHHGFHHEHHFF9ECD?CEEHED<HBDEEBFEDEEE<FDFDGFBEHHEHCE>F?GEEDEEG
+SEQ:1:1101:9501:3800#0/1 adapter start: 42	1	42	72	TGACCACCTACATACCAAAGACGAGCGCCTTTACGCTTGCCT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TTAGTACCTCGCAACGGCTGCGGACGACC	adapt	HHHHHHHHHHHHHHHHFHHHHHHHHFHHHHHHHHHHHHHHHH	HHHHHFHHHHHHHHHHHHHHFBHAEDBEFB	BEF=ADEEGGGEFCC>B1CCDCB7FGFFE
+SEQ:1:1101:9703:3807#0/1 adapter start: 27	1	27	57	TAATAACCTGATTCAGCGAAACCAATC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CGCGGCATTTAGTAGCGGTAAAGTTAGACCAAACCATGAAACCA	adapt	HHHHHHHHHHHHHHHHHHHHHHGHHHG	HHHFHGFHHHHFFHHHHHDHHHHBGFEFHH	HFHFHFDHFDFFFEHHGHDHHGHHEHHG at E?FDGBEBDGGFFGF
+SEQ:1:1101:9728:3808#0/1 adapter start: 7	1	7	37	CAGAAAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CCTACCGCGCTTCGCTTGGTCAACCCCTCAGCGGCAAAAATTAAAATTTTTACCGCTTCGGCGT	adapt	HHHFHHH	HHHHHHHHHHHHHHHFHHHHHHHHHHHFB8	@B9C?CC at CHCFFFHF=FEED<4:?:>@,@;@>.>6;+?&@><CEC??A><:BC?DE@=7@###
+SEQ:1:1101:9676:3812#0/1 adapter start: 1	1	1	31	T	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATG	adapt	H	HHHHHHHHHHHHHHHHHHHHHHHFHHHHHH	HDHFHHHHHHHECHHEHEHHH=HHFHHFHFHHFHFHGFFEECFFHEFFGFGHFFEHHFHHFFFHF<F:D7
+SEQ:1:1101:9620:3815#0/1	-1	TCGCGTAGAGGCTTTGCTATTCAGCGTTTGATGAATGCAATGCGACAGGCTCATGCTGATGGTTGGTTTATCGTTTTTGACACTCTCACGTTGGCTGACGA	HHHHHHHHHHGGHHHGHHGHHHHHHHHHHGFHGHHHHHHHHHFHDHHHDDHFHFHFHHHHFF9EFF>DG?FCBCDFFFEBFFE at DFEGGEEG?GF>>:;@A
+SEQ:1:1101:9720:3834#0/1 adapter start: 74	1	74	101	TAGACATTTTTACTTTTTATGTCCCTCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGT	GCCTAACTTCTTAGACTGCCTTAAGGA		adapt	HGHHHHHHHHHHHHHHHGGHEGGFGHFGHFHHDGHGHGHHHHHHHHHHFHHHHHFHFHFFHEFHF=FFHFHHFF	HFGAGGHHDHGHBHHHEGDGC>FEC at D	
+SEQ:1:1101:9635:3844#0/1 adapter start: 4	1	4	34	GACC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATCCAAAGGATAAACATCATAGGCAGTCGGGAGGGTAGTCGGAACCGAAGAAGACTCAAAGCGAACC	adapt	HHHH	GHHHHHHHHHGHHHHHHGHHGHHHGHGHHH	HFHHH;GGCGFH?HHFHEHHFFHFHFFFHHFDHHHHHHHHHEGHHHHGHGHEHHHHC@?GFEGBGHH
+SEQ:1:1101:9744:3849#0/1 adapter start: 55	1	55	85	AAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TGTTGAACACGACCAG	adapt	HHHHHHHGCHHFHHFHHFFHEHFGCHHGDGHEFFHFHEHHGBBGFCDGFEEFDCF	FGEEEHEHFHHHCFF?EEFDEFD6FHGEHH	HEHHHBBE?:CCDA7G
+SEQ:1:1101:9725:3850#0/1	-1	ATAACCCTGAAACAAATGCTTAGGGATTTTATTGGTATCAGGGTTAATCGTGCCAAGAAAAGCGGCATGGTCAATATAACCAGTAGTGTTAACAGTCGGGA	FDGGGDGGGEGGGGGBGBEGFFFDFFFFGGFGGGGFBGGGGGEFDFFGEGFFEFEDGGEEF9DCF?EFBBEDBBGFGGEGGGGCFGFEB at B7C>CDEEE##
+SEQ:1:1101:9544:3854#0/1	-1	TAGCGGTAAAGTTAGACCAAACCATGAAACCAACATAAACATTATTGCCCGGCGTACGGGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAA	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHHHHHHFFHHHHHHHHHBFHHHHHFHHHHHHHHHHHHHHFCHHHBHE
+SEQ:1:1101:9581:3856#0/1	-1	GGGCGGTGGTCTATAGTGTTATTAATATCAAGTTGGGGGAGCACATTGTAGCATTGTGCCAATTCATCCATTAACTTCTCAGTAACAGATACAAACTCATC	HHHHHHEHHHHHHHGHHHHHHHHHHHHHHHHHHHHHHHFHHHHGHHHHHHHHHHHHHHHGGHHHFHHHHHGHFGHGEGHHHHHHFEHFHGDGGFFGHH at DH
+SEQ:1:1101:9649:3858#0/1 adapter start: 33	1	33	63	CCTCCAAACAATTTAGACATGGCGCCACCAGCA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AGAGCAGAAGCAATACCGCCAGCAATAGCAACAAACAT	adapt	B<B at A@AAB>FEEEE@@BA at 3>8<>CCDDBEE@	DEFFDDFE=EEB at EDEEFDFDECEEBEB:C	-@<698<@BBA at DCBDDFCEBFCCD;DC=D at C######
+SEQ:1:1101:9616:3862#0/1 adapter start: 91	1	91	101	GAATTAAATCGAAGTGGACTGCTGGCGGAAAATGAGAAAATTCGACCTATCCTTGCGCAGCTCGAGAAGCTCTTACTTTGCGACCTTTCGC	GCCTAACTTC		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHHEHHHHHHHHHHHHHHFHHHHHHHFFFHFDHHEHHHGHHHHGDEHHGHHEGH	GCHHHHEHFG	
+SEQ:1:1101:9696:3866#0/1	-1	CAAGTTGCCATACAAAACAGGGTCGCCAGCAATATCGGTATAAGTCAAAGCACCTTTAGCGTTAAGGTACTGAATCTCTTTAGTCGCAGTAGGCGGAAAAC	HHHHHHHHHHHHHHHHHHHHEHEHHHEHHHHFHHHHHHFHHHFHFHHHHHHHHFHHHHFHHFEHBHFEHHHHCEEHHFHHHHHHHHHHHHEHHHHCAFEFG
+SEQ:1:1101:9512:3869#0/1	-1	GCTCGACGCCATTAATAATGTTTTCCGTAAATTCAGCGCCTTCCATGATGAGACAGGCCGTTTGAATGTTGACGGGATGAACATAATAAGCAATGACGGCA	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHFHHHDHHHEHHFFFFFFHFAFEFH?E at FFGGGFGHFHAEFGFFFCEEFF
+SEQ:1:1101:9723:3870#0/1 adapter start: 66	1	66	96	CTTTAGCAGCAAGGTATATATCTGACTTTTTGTTAACGTATTTAGCCACATAGCAACCAACAGACA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TATAA	adapt	##################################################################	##############################	#####
+SEQ:1:1101:9667:3874#0/1	-1	CTGCTGCATTTCCTGAGCTTAATGCTTGGGAGCGTGCTGGTGCTGATGCTTCCTCTGCTGGTATGGTTGACGCCGGATTTGAGAATCAAAAAGAGCTTACT	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHAHHHHEHHD=DAD>D6ADGE at EBE;@?BCGGE?4>ADAAC
+SEQ:1:1101:9565:3879#0/1 adapter start: 24	1	24	54	AGCCTTATGGCCGTCAACATACAT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATCACCATTATCGAACTCAACGCCCTGCATACGAAAAGACAGAATCT	adapt	HHHHHHHHHHHHHHHHHFHHGFFH	HHHHHHHHHDGHHFHFHHHHHFECHFFHHH	HHEHFCFFFFHEHDEFHHCHHEG?GFEGGEGHHHHHH?HH?EFFFFF
+SEQ:1:1101:9721:3885#0/1 adapter start: 51	1	51	81	TTCCTCAAACGCTTGTTCGGTGGATAAGTTATGGCATTAATCGATTTATTT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATCTCGCGGAAGAAAAACAC	adapt	>BC?:A?=<>::A=528882.53)5.77;407)*9@:AA8CAA########	##############################	####################
+SEQ:1:1101:9707:3894#0/1 adapter start: 40	1	40	70	AACACCATCCTTCATGAACTTAATCCACTGTTCACCATAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ACGTGACGATGAGGGACATAAAAAGTAAAAA	adapt	F at F8DEE@EEBCCCCFFEFDDC=DCCFFF=ADD=D>@AA@	FFFDE99>,>>@=856>;6C<@1:39@>6@	=??:B<B at B@F at FFE<@;B@###########
+SEQ:1:1101:9560:3900#0/1 adapter start: 6	1	6	36	AGAAGT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GCCAGCCTGCAACGTACCTTCAAGAAGTCCTTTACCAGCTTTAGCCATAGCACCAGAAACAAAAC	adapt	GGGGGF	GGGGBGGGGFGGGFBEEDEEGFGACDDADF	EFFEDFGGEFECFFDFGFBDBGBFD?@.DCC5:;GFF>AEEEBEDFBF69:<8<B.DAC@;B@@E
+SEQ:1:1101:9696:3913#0/1 adapter start: 2	1	2	32	CC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	ATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCC	adapt	HH	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH	GHHHHGHHHHHHHHGHHFHHHHHHHHHHFHHHHHHHHHHHHFBGGGGFHHHHHHHHHEHHHHHHHHHEH
+SEQ:1:1101:9574:3914#0/1 adapter start: 5	1	5	35	GAACA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	AGCGCAAGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTC	adapt	HHHHH	HHHHHHHHHHFHHHHHHHHFHFHHHEHGHH	HHHHHHHHHHHHHEHHHHHHFGHFHEEFEGEHFCDEFFEFHFHGEHHHHHHHHHHHHFGHDHHFHD
+SEQ:1:1101:9508:3931#0/1 adapter start: 91	1	91	101	TAGAGAACGAGAAGACGGTTACGCAGTTTTGCCGCAAGCTGGCTGCTGAACGCCCTCTTAAGGATATTCGCGATGAGTATAATTACCCCAA	GCCTAACTTC		adapt	HGHHHHHHHHHHHHHHHHHHGHHHHHFHHHGHHHHFHHHHHHHHHD?ACFEF9FFEEBHBAEFB?E<F5CAD(DAEE;AE at C?D at BDGF?F	FFG;?DGDD:	
+SEQ:1:1101:9617:3935#0/1	-1	TAAATTTAATGTGACCGTTTATCGCAATCTGCCGACCACTCGCGATTCAATCATGACTTCGTGATAAAAGATTGAGTGTGAGGTTATAACGCCGAAGCGGT	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHGHHHHHHHHHHHHHHHHHFHHHFFEDFEFHFHHFHFHGHHFHHHFHHEHHHFHHHHFB
+SEQ:1:1101:9667:3950#0/1 adapter start: 66	1	66	96	CTTTAGCCATAGCACCAGAAACAAAACTAGGGACGGCCTCATCAGGGTTAGGAACATTAGAGCCTT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GAATG	adapt	HHHHHHHHHHHHHHHHGHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHEHHHHDGHFHHHHHHHBHH	HHHHHHHHHEEGDCGGBBFCFFE;GFBFFH	BDEH=
+SEQ:1:1101:9705:3951#0/1 adapter start: 29	1	29	59	ATTGCGTACCCGACGACCAAAATTAGGGT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CAACGCTACCTGTAGGAAGTGTCCGCATAAAGTGCACCGCAT	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHH	HHHHHHHFHHHHFHHHHFHEHHHHHHHFGH	FHEHHFHFHHHFHBFHHHHHHHHHHHFHEBFHFFFFCFCEF@
+SEQ:1:1101:9527:3965#0/1	-1	AACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGTTTGGTCAGTTCC	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHFHHGHHFHEHHHEFEFF at HFHFGGGDGGHFGDFHFGHGHHFGHG
+SEQ:1:1101:9550:3969#0/1	-1	AGAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGTTGAACACGACCAGAAAACTGGCCTAACGACGTTTGGTC	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHFHHHHHHHHHHHHFHHHFHHFEHHHHHHHHHHHHHGHHFHHHHFHHHHHHHHHHHG
+SEQ:1:1101:9636:3973#0/1 adapter start: 9	1	9	39	CAAGCGCAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GAGTAAACATAGTGCCATGCTCAGGAACAAAGAAACGCGGCACAGAATGTTTATAGGTCTGT	adapt	HHHHHHHHH	HHHHHHHHHHHHHHHHHHHHHHGHHHHHHH	HHHHGHHHHHHFHHHFHHHHHHHBHHHFHHGFHHFEGFHGGHHHHHHHHEHHHFFHHHEEHE
+SEQ:1:1101:9726:3981#0/1 adapter start: 66	1	66	96	TTGCTGCCATCTCAAAAACATTTGGACTGCTCCGCTTCCTCCTGAGACTGAGCTTTCTCGCCAAAT	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GACGA	adapt	HHFEHHHHHHHHHHHHHHHHHHHHHHGHGHHGHGHHHHHHHHHHGGHHHEHHGFHHHHHEHHEHGH	GHGHHEHHBGGGG?GDFGGEGD=GEEGBGE	GFBEA
+SEQ:1:1101:9603:3981#0/1 adapter start: 32	1	32	62	TCTAAGAAGTTTAAGATTGCTGAGGGTCAGTG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GTATCGTTATGCGCCTTCGTATGTTTCTCCTGCTTATCA	adapt	HHHHHHHHHHHHHHHFHHHHHHHHHHEHHHHH	HEHHHHHHHFHHGHGHHHHHFHHEHHFGHH	HHEFHFHHHHHHFGHGHFHHFHHHEHEGFBDGGGB at F;G
+SEQ:1:1101:9533:3990#0/1 adapter start: 1	1	1	31	G	GCCTAACTTCTTAGACTGCCTTAAGGACGT	GGGAAGGACGTCAATAGTCACACAGTCCTTGACGGTATAATAACCACCATCATGGCGACCATCCAAAGGA	adapt	B	EFFEFF=FFEFDFFDFBF at D@DDDBBDD at B	CDDD::@=?BDCCAE@;BEEEE6>B5D>@DEDEEF?F<EFBBFFD8BCDDDCBCEECEEEE2??######
+SEQ:1:1101:9583:3992#0/1 adapter start: 20	1	20	50	AAGGTACTGAATCTCTTTAG	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TCGCAGTAGGCGGAAAACGAACAAGCGCAAGAGTAAACATAGTGCCATGCT	adapt	98583=>><>B at CBCD==BB	DCDCCDD=8A>@<3A499:1@@@@CDC@@=	@=<C7:163><6@<@:=<?A0;333+01-97=<><?C@@@<99>>189<16
+SEQ:1:1101:9903:3754#0/1	-1	ACCAAAATTAGGGTCAACGCTACCTGTAGGAAGTGTCCGCATAAAGTGCACCGCATGGAAATGAAGACGGCCATCAGCTGTACCATACTCAGGCACACAAA	GFEGGGGGBGE at EAEEGGFGGEGGFGEFFGFGFFGGEGGGGEFGCFCEFBF7FGEGEF?BFEEFDFFE??AADD+D at C@CGFCE6FDFFDFBGFDD at DAAD
+SEQ:1:1101:9878:3755#0/1 adapter start: 32	1	32	62	AGAACGTGAAAAAGCGTCCTGCGTGTAGCGAA	GCCTAACTTCTTAGACTGCCTTAAGGACGT	CTGCGATGGGCATACTGTAACCATAAGGCCACGTATTTT	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHH	HFHHHBHHHHHHHHHFHFHEHHHHHHHHHH	HHHEFHFHHHDFHHHHFGHHHHHFCEHECHHF?D5D7 at D
+SEQ:1:1101:9833:3756#0/1 adapter start: 65	1	65	95	TCATCGTCACGTTTATGGTGAACAGTGGATTAAGTTCATGAAGGATGGTGTTAATGCCACTCCTC	GCCTAACTTCTTAGACTGCCTTAAGGACGT	TCCCGA	adapt	HHHHHHHHHHHHHHHHHFHHHHHHHHHHHHHHHHFHHHHGHHFHHHHHEHEHHHHFHEHHHEHFH	HHFHHFEFFB=;,01:99;;HHHHHHEFGE	EFFBFB
+SEQ:1:1101:9991:3777#0/1	-1	GCTTTGAGTCTTCTTCGGTTCCGACTACCCTCCCGACTGCCTATGATGTTTATCCTTTGGATGGTCGCCATGATGGTGGTTATTATACCGTCAAGGACTGT	HHHHHHHHHHHHHHHHHHHHHHGHHHGHHHHHHHGHHHHHHGHHHHHHHHHHHHHFHHFFDFFFCFFDHCFF;BFGEFGEGFGGFFF.CFDCCEDB=CBC@
diff --git a/tests/cut/illumina5.fastq b/tests/cut/illumina5.fastq
new file mode 100644
index 0000000..1b85887
--- /dev/null
+++ b/tests/cut/illumina5.fastq
@@ -0,0 +1,20 @@
+ at SEQ:1:1101:9010:3891#0/1 adapter start: 51
+ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGG
++
+FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDF
+ at SEQ:1:1101:9240:3898#0/1
+CCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTG
++
+GHGHGHHHHGGGDHHGDCGFEEFHHGDFGEHHGFHHHHHGHEAFDHHGFHHEEFHGHFHHFHGEHFBHHFHHHH at GGGDGDFEEFC@=D?GBGFGF:FB6D
+ at SEQ:1:1101:9207:3899#0/1 adapter start: 64
+TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAAC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHF
+ at SEQ:1:1101:9148:3908#0/1 adapter start: 28
+ACGACGCAATGGAGAAAGACGGAGAGCG
++
+HHHHHHHHHHHHGHHHHGHHHHHHHHHH
+ at SEQ:1:1101:9044:3916#0/1 adapter start: 78
+AACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHGHHHHHHHHHHHHFHEBFHFFEFHE
diff --git a/tests/cut/illumina5.info.txt b/tests/cut/illumina5.info.txt
new file mode 100644
index 0000000..b5a6cec
--- /dev/null
+++ b/tests/cut/illumina5.info.txt
@@ -0,0 +1,8 @@
+SEQ:1:1101:9010:3891#0/1 adapter start: 51	0	64	81	ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGGGCCTAACTTCTTA	GACTGCCTTAAGGACGT	AAGCCAAGATGGGAAAGGTC	adapt2	FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDFFDB4B?DB21;84	?DDBC9DEBAB;=@<@@	B@@@@B>CCBBDE98>>0 at 7
+SEQ:1:1101:9010:3891#0/1 adapter start: 51	1	51	64	ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGG	GCCTAACTTCTTA		adapt	FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDF	FDB4B?DB21;84	
+SEQ:1:1101:9240:3898#0/1	-1	CCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTG	GHGHGHHHHGGGDHHGDCGFEEFHHGDFGEHHGFHHHHHGHEAFDHHGFHHEEFHGHFHHFHGEHFBHHFHHHH at GGGDGDFEEFC@=D?GBGFGF:FB6D
+SEQ:1:1101:9207:3899#0/1 adapter start: 64	0	77	94	TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAACGCCTAACTTCTTA	GACTGCCTTAAGGACGT	ATACATA	adapt2	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHFHHFHFFFFFBHHG	HHHFFHHFHGGHHDEBF	G<FGGDG
+SEQ:1:1101:9207:3899#0/1 adapter start: 64	1	64	77	TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAAC	GCCTAACTTCTTA		adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHF	HHFHFFFFFBHHG	
+SEQ:1:1101:9148:3908#0/1 adapter start: 28	0	41	58	ACGACGCAATGGAGAAAGACGGAGAGCGGCCTAACTTCTTA	GACTGCCTTAAGGACGT	CCAACGGCGTCCATCTCGAAGGAGTCGCCAGCGATAACCGGAG	adapt2	HHHHHHHHHHHHGHHHHGHHHHHHHHHHHHHHHHHHHHHHH	HHHHGHHHHDHDHHFHH	HHHFFFFFHHHEFBEGEGGFFFHHHFHHHHHHFHHEHHGHEHD
+SEQ:1:1101:9148:3908#0/1 adapter start: 28	1	28	41	ACGACGCAATGGAGAAAGACGGAGAGCG	GCCTAACTTCTTA		adapt	HHHHHHHHHHHHGHHHHGHHHHHHHHHH	HHHHHHHHHHHHH	
+SEQ:1:1101:9044:3916#0/1 adapter start: 78	1	78	91	AACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGA	GCCTAACTTCTTA	GACTGCCTTA	adapt	HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHGHHHHHHHHHHHHFHEBFHFFEFHE	FHHGHFHHHHGGH	GHHFHGGGHG
diff --git a/tests/cut/illumina64.fastq b/tests/cut/illumina64.fastq
new file mode 100644
index 0000000..bbfce73
--- /dev/null
+++ b/tests/cut/illumina64.fastq
@@ -0,0 +1,80 @@
+ at 14569
+AAGTTTATTCCTGGACGAAGGAAGAAAAGGCCAGATGGGAAACAAGAACAAGCCCCTGTTGAAGACGCAGGGCC
++
+cceeeeceeeee`dedbdbdb_^b`abU_cacadabd`dLMZ[XTcT^a^adaaaddcd`aL^`^_`Y\]^`Y_
+ at 19211
+AGA
++
+^\`
+ at 9180
+GAGGG
++
+b`bLb
+ at 19132
+TGTGATTATCCACTGGTATAT
++
+Z[QZZLZ[]J[SHZNaZ[_Ia
+ at 15868
+CTGCCAAGGCTGCCCCCAAA
++
+`c`cc\`\Lb]bL`[`a]L`
+ at 1424
+GGCCCCAGACTTGCTCCCCCAACAAGGACAATGTCCAAGGAGTGTCCCC
++
+eeeeeeeea`bbdaaadad`Oaaaaccada_aa_d`_X`_^`[`_[_W^
+ at 7855
+GTGGGGGCT
++
+]^\]FW]Z`
+ at 17943
+ACATGGGACCAGAAAACACCACCAGGGGTTTGGGGCTGTCCTGAG
++
+ccc`\^`aba\b^`\FR`OOPYG[[W```[Ra_RR_\]\\P\_H_
+ at 11100
+CGGATAACTGAAAATGCATTTTTAACGCCATGACCGTGTCTCAAGGACCCGCTGTGGAAG
++
+b`b_b_a\bc^Tabadaddcddd``bdaa_^aJ\^_\]\\__O[___L^\_aaa^^^UJ^
+ at 15663
+AGGT
++
+aaKa
+ at 4698
+CCAATTGGCACCCCTCTGCCTTCAGCCATT
++
+cccc\`ccc\caccZccccc]^`LY\bL_b
+ at 20649
+TCTGGACTGGATCTTTAGGATGGTGGAGATGATCTGGATGTAGGACAAAAGAACCAGGCAGAAGGGTG
++
+eeeeeaddadacdddebeccdddadd\^abbT_]bccTac]]b]L^][]Ve[^ZaY_^_^`\\Y]^Y`
+ at 17259
+
++
+
+ at 6003
+CTTCAACTCATCTTGTTATTAATACCATCAATATCCCATGAGGCTCATAAAACGAGTCTTTCTTCTTGGAAACATGACCAAGATTGGGCAAACGT
++
+fffffffffffffffffdffecfcefeffdcfdeeebbbdbccccc\db\`^aa`^Y^^^cbcbaa`bbWY^^^__S_YYR]GWY]\]]XX\_`S
+ at 4118
+TCAAATTGTACTGCAAAGAAGGTCCCAGCTGGTCTCTTCTGGGAGTGATCTAACTAACTTAAG
++
+dc^ddeeeeeedeee`ceceddadadddcbde_dedc_ec_a^^b\b\\]VIPZY^T^^^\L_
+ at 18416
+GTGGGGAAGCCGAAGAAGCAGCGGAGATCGATTGTAAGAACGACG
++
+dddacaabdbea\d^cce\da`dd_^__`a`a`b[_^__^\^^^_
+ at 20115
+TGAAAAAGGAAAACATGGTAGTTTTCTTGTATGAGAGAGCCAGAGCCACCTTGGAGATTTTGTTCTCTCTGTGCG
++
+ed^eeafffaddfecdddabc^_badd`bd_ddadaa^bbcad\d\__^_\aaa_aY____aaN_\cdc\^aaYb
+ at 16139
+TCATCCGAAGAGTTGGCAGGCCCTGTGAATTGTGAAAACAGTATACCCACCCCTTTCCC
++
+cabacacY^c\daaddaadad^\ad_a\Y`[ZQ]Y^^OYQ^X^YT\\]U\^RRX^\YJ^
+ at 14123
+GATTTGGGGAAAGGAAACAATAGTTGAGTTTGGGCCACGGGAAATTCAAGATGCCTGGTATGTC
++
+cccccccac^bYbbT_aa_Yb^^Ta\\^]]aaTaaaaab\b\XL`VZZV]QYYY[aa^^^^_^^
+ at 8766
+ACCTGTAAGGTCCGCTCCTGGTGGACACCCACGAAGTCCAGGGCCTCAGGCAGGAAGTTGTAGCGCAGAGTTTTGAGCAGCTGCTCCATC
++
+fcfffffcffeffeeefdefddeecdccacddfdYd`d^\_^`\_abbc\b[ba^Y^Z_^^H^Z_^Y_Y_OKWPZR]]Z]`Z``Z^UHZ^
diff --git a/tests/cut/interleaved.fastq b/tests/cut/interleaved.fastq
new file mode 100644
index 0000000..081a90f
--- /dev/null
+++ b/tests/cut/interleaved.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read1/2 other text
+GCTGGAGACAAATAA
++
+HHHHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGG
++
+#HHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/issue46.fasta b/tests/cut/issue46.fasta
new file mode 100644
index 0000000..0bc0403
--- /dev/null
+++ b/tests/cut/issue46.fasta
@@ -0,0 +1,2 @@
+>readname
+A
diff --git a/tests/cut/linked.fasta b/tests/cut/linked.fasta
new file mode 100644
index 0000000..c010e80
--- /dev/null
+++ b/tests/cut/linked.fasta
@@ -0,0 +1,10 @@
+>r1 5' adapter and 3' adapter
+CCCCCCCCCC
+>r5 only 5' adapter
+CCCCCCCCCCGGGGGGG
+>r3 5' adapter, partial 3' adapter
+CCCGGCCCCC
+>r4 only 3' adapter
+GGGGGGGGGGCCCCCCCCCCTTTTTTTTTTGGGGGGG
+>r2 without any adapter
+GGGGGGGGGGGGGGGGGGG
diff --git a/tests/cut/lowercase.fastq b/tests/cut/lowercase.fastq
new file mode 100644
index 0000000..a3437d1
--- /dev/null
+++ b/tests/cut/lowercase.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGA
++
+)3%)&&&&!.1&(6:<'67..*,:
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCC
++
+;<:&:A;A!9<<<,7:<=3=;:
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/lowqual.fastq b/tests/cut/lowqual.fastq
new file mode 100644
index 0000000..58c5a65
--- /dev/null
+++ b/tests/cut/lowqual.fastq
@@ -0,0 +1,8 @@
+ at first_sequence
+
++
+
+ at second_sequence
+
++
+
diff --git a/tests/cut/maxlen.fa b/tests/cut/maxlen.fa
new file mode 100644
index 0000000..8b4729b
--- /dev/null
+++ b/tests/cut/maxlen.fa
@@ -0,0 +1,14 @@
+>read_length0a
+T
+>read_length0b
+T
+>read_length1
+T2
+>read_length2
+T02
+>read_length3
+T302
+>read_length4
+T3302
+>read_length5
+T23302
diff --git a/tests/cut/maxn0.2.fasta b/tests/cut/maxn0.2.fasta
new file mode 100644
index 0000000..0255fc7
--- /dev/null
+++ b/tests/cut/maxn0.2.fasta
@@ -0,0 +1,6 @@
+>r1
+
+>r3
+AAAA
+>r4
+AAAAN
diff --git a/tests/cut/maxn0.4.fasta b/tests/cut/maxn0.4.fasta
new file mode 100644
index 0000000..9c830e5
--- /dev/null
+++ b/tests/cut/maxn0.4.fasta
@@ -0,0 +1,8 @@
+>r1
+
+>r3
+AAAA
+>r4
+AAAAN
+>r5
+AAANN
diff --git a/tests/cut/maxn0.fasta b/tests/cut/maxn0.fasta
new file mode 100644
index 0000000..d448df2
--- /dev/null
+++ b/tests/cut/maxn0.fasta
@@ -0,0 +1,4 @@
+>r1
+
+>r3
+AAAA
diff --git a/tests/cut/maxn1.fasta b/tests/cut/maxn1.fasta
new file mode 100644
index 0000000..4edae80
--- /dev/null
+++ b/tests/cut/maxn1.fasta
@@ -0,0 +1,8 @@
+>r1
+
+>r2
+N
+>r3
+AAAA
+>r4
+AAAAN
diff --git a/tests/cut/maxn2.fasta b/tests/cut/maxn2.fasta
new file mode 100644
index 0000000..3eb7ba2
--- /dev/null
+++ b/tests/cut/maxn2.fasta
@@ -0,0 +1,10 @@
+>r1
+
+>r2
+N
+>r3
+AAAA
+>r4
+AAAAN
+>r5
+AAANN
diff --git a/tests/cut/minlen.fa b/tests/cut/minlen.fa
new file mode 100644
index 0000000..fa9b0fe
--- /dev/null
+++ b/tests/cut/minlen.fa
@@ -0,0 +1,16 @@
+>read_length5
+T23302
+>read_length6
+T023302
+>read_length7
+T1023302
+>read_length8
+T11023302
+>read_length9
+T111023302
+>read_length10
+T2111023302
+>read_length11
+T02111023302
+>read_length12
+T002111023302
diff --git a/tests/cut/minlen.noprimer.fa b/tests/cut/minlen.noprimer.fa
new file mode 100644
index 0000000..1befe6e
--- /dev/null
+++ b/tests/cut/minlen.noprimer.fa
@@ -0,0 +1,14 @@
+>read_length6
+23302
+>read_length7
+023302
+>read_length8
+1023302
+>read_length9
+11023302
+>read_length10
+111023302
+>read_length11
+2111023302
+>read_length12
+02111023302
diff --git a/tests/cut/nextseq.fastq b/tests/cut/nextseq.fastq
new file mode 100644
index 0000000..fad6929
--- /dev/null
+++ b/tests/cut/nextseq.fastq
@@ -0,0 +1,8 @@
+ at NS500350:251:HLM7JBGXX:1:11101:12075:1120 1:N:0:TACAGC
+GATCGGAAGAGCACACGTCTGAACTCCAGTCACTACAGCATCTCGTATTCCGTCTTCTGCTTGAAAAAAAA
++
+AAAAAEEEEEEAEEEEAEAEEEEEEAEEEEEEEEEEEEEEE///E/EE////AAEE/E//////EEEEEEE
+ at NS500350:251:HLM7JBGXX:1:11101:22452:1121 1:N:0:TACAGC
+GATCGGAAGAGCACACGTCTGAACTCCAGTCACTACAGCATCGCGTATGCCGTCTTATGCTTGAAAAAAAAA
++
+AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/////E/EE//E6///E//A//E//EEEEEEEE
diff --git a/tests/cut/no-trim.fastq b/tests/cut/no-trim.fastq
new file mode 100644
index 0000000..d3668fd
--- /dev/null
+++ b/tests/cut/no-trim.fastq
@@ -0,0 +1,4 @@
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/no_indels.fasta b/tests/cut/no_indels.fasta
new file mode 100644
index 0000000..7c56412
--- /dev/null
+++ b/tests/cut/no_indels.fasta
@@ -0,0 +1,18 @@
+>3p_orig
+TGAACATAGC
+>3p_mism
+TGAACATAGC
+>3p_del
+TGAACATAGCTTAACATATAACCG
+>3p_ins
+TGAACATAGCTTAGGACATATAACCG
+>3p_frontins
+TAGACATATAACCG
+>5p_orig
+TACTGCTTCTCGAA
+>5p_mism
+TACTGCTTCTCGAA
+>5p_del
+TCCTCGAGATGCCATACTGCTTCTCGAA
+>5p_ins
+TCCTCGAGATATGCCATACTGCTTCTCGAA
diff --git a/tests/cut/overlapa.fa b/tests/cut/overlapa.fa
new file mode 100644
index 0000000..a4ad60d
--- /dev/null
+++ b/tests/cut/overlapa.fa
@@ -0,0 +1,40 @@
+>read1
+T0021110233021
+>read2
+T0021110233021
+>read3
+T0021110233021
+>read4
+T0021110233021
+>read5
+T0021110233021
+>read6
+T0021110233021
+>read7
+T0021110233021
+>read8
+T0021110233021
+>read9
+T0021110233021
+>read10
+T0021110233021330201030
+>read11
+T002111023302133020103
+>read12
+T00211102330213302010
+>read13
+T0021110233021330201
+>read14
+T002111023302133020
+>read15
+T00211102330213302
+>read16
+T0021110233021330
+>read17
+T002111023302133
+>read18
+T00211102330213
+>read19
+T0021110233021
+>read20
+T002111023302
diff --git a/tests/cut/overlapb.fa b/tests/cut/overlapb.fa
new file mode 100644
index 0000000..decf1d3
--- /dev/null
+++ b/tests/cut/overlapb.fa
@@ -0,0 +1,38 @@
+>adaptlen18
+ATACTTACCCGTA
+>adaptlen17
+ATACTTACCCGTA
+>adaptlen16
+ATACTTACCCGTA
+>adaptlen15
+ATACTTACCCGTA
+>adaptlen14
+ATACTTACCCGTA
+>adaptlen13
+ATACTTACCCGTA
+>adaptlen12
+ATACTTACCCGTA
+>adaptlen11
+ATACTTACCCGTA
+>adaptlen10
+ATACTTACCCGTA
+>adaptlen9
+TCTCCGTCGATACTTACCCGTA
+>adaptlen8
+CTCCGTCGATACTTACCCGTA
+>adaptlen7
+TCCGTCGATACTTACCCGTA
+>adaptlen6
+CCGTCGATACTTACCCGTA
+>adaptlen5
+CGTCGATACTTACCCGTA
+>adaptlen4
+GTCGATACTTACCCGTA
+>adaptlen3
+TCGATACTTACCCGTA
+>adaptlen2
+CGATACTTACCCGTA
+>adaptlen1
+GATACTTACCCGTA
+>adaptlen0
+ATACTTACCCGTA
diff --git a/tests/cut/paired-filterboth.1.fastq b/tests/cut/paired-filterboth.1.fastq
new file mode 100644
index 0000000..a8b2b28
--- /dev/null
+++ b/tests/cut/paired-filterboth.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACA
++
+HHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-filterboth.2.fastq b/tests/cut/paired-filterboth.2.fastq
new file mode 100644
index 0000000..655d545
--- /dev/null
+++ b/tests/cut/paired-filterboth.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GCTGGAGACAAATAACAGT
++
+HHHHHHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTGCAGT
++
+###HHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired-m27.1.fastq b/tests/cut/paired-m27.1.fastq
new file mode 100644
index 0000000..3f2d733
--- /dev/null
+++ b/tests/cut/paired-m27.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGCTTAGACATATCGCCT
++
+##HHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACATTAGACATATCGGATGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACATTAGACA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-m27.2.fastq b/tests/cut/paired-m27.2.fastq
new file mode 100644
index 0000000..808df31
--- /dev/null
+++ b/tests/cut/paired-m27.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GCTGGAGACAAATAACAGTGGAGTAGTTTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTGCAGTGGAGTAACTCCAGC
++
+###HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired-onlyA.1.fastq b/tests/cut/paired-onlyA.1.fastq
new file mode 100644
index 0000000..3f2d733
--- /dev/null
+++ b/tests/cut/paired-onlyA.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGCTTAGACATATCGCCT
++
+##HHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACATTAGACATATCGGATGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACATTAGACA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-onlyA.2.fastq b/tests/cut/paired-onlyA.2.fastq
new file mode 100644
index 0000000..15354e0
--- /dev/null
+++ b/tests/cut/paired-onlyA.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GCTGGAGACAAATAA
++
+HHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTG
++
+###HHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGG
++
+#HHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired-separate.1.fastq b/tests/cut/paired-separate.1.fastq
new file mode 100644
index 0000000..a8b2b28
--- /dev/null
+++ b/tests/cut/paired-separate.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACA
++
+HHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-separate.2.fastq b/tests/cut/paired-separate.2.fastq
new file mode 100644
index 0000000..15354e0
--- /dev/null
+++ b/tests/cut/paired-separate.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GCTGGAGACAAATAA
++
+HHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTG
++
+###HHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGG
++
+#HHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired-too-short.1.fastq b/tests/cut/paired-too-short.1.fastq
new file mode 100644
index 0000000..64322e2
--- /dev/null
+++ b/tests/cut/paired-too-short.1.fastq
@@ -0,0 +1,4 @@
+ at read2/1
+CAACAGGCCACA
++
+HHHHHHHHHHHH
diff --git a/tests/cut/paired-too-short.2.fastq b/tests/cut/paired-too-short.2.fastq
new file mode 100644
index 0000000..96d2253
--- /dev/null
+++ b/tests/cut/paired-too-short.2.fastq
@@ -0,0 +1,4 @@
+ at read2/2
+TGTGGCCTGTTG
++
+###HHHHHHHHH
diff --git a/tests/cut/paired-trimmed.1.fastq b/tests/cut/paired-trimmed.1.fastq
new file mode 100644
index 0000000..fb3f459
--- /dev/null
+++ b/tests/cut/paired-trimmed.1.fastq
@@ -0,0 +1,12 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACA
++
+HHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-trimmed.2.fastq b/tests/cut/paired-trimmed.2.fastq
new file mode 100644
index 0000000..1feef27
--- /dev/null
+++ b/tests/cut/paired-trimmed.2.fastq
@@ -0,0 +1,12 @@
+ at read1/2 other text
+GCTGGAGACAAATAACAGTGGAGTAGTTTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTGCAGTGGAGTAACTCCAGC
++
+###HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-untrimmed.1.fastq b/tests/cut/paired-untrimmed.1.fastq
new file mode 100644
index 0000000..8ab53bd
--- /dev/null
+++ b/tests/cut/paired-untrimmed.1.fastq
@@ -0,0 +1,4 @@
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired-untrimmed.2.fastq b/tests/cut/paired-untrimmed.2.fastq
new file mode 100644
index 0000000..ca52d30
--- /dev/null
+++ b/tests/cut/paired-untrimmed.2.fastq
@@ -0,0 +1,4 @@
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired.1.fastq b/tests/cut/paired.1.fastq
new file mode 100644
index 0000000..d6f246d
--- /dev/null
+++ b/tests/cut/paired.1.fastq
@@ -0,0 +1,12 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired.2.fastq b/tests/cut/paired.2.fastq
new file mode 100644
index 0000000..eb4df03
--- /dev/null
+++ b/tests/cut/paired.2.fastq
@@ -0,0 +1,12 @@
+ at read1/2 other text
+GCTGGAGACAAATAA
++
+HHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGG
++
+#HHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/paired.m14.1.fastq b/tests/cut/paired.m14.1.fastq
new file mode 100644
index 0000000..d6f246d
--- /dev/null
+++ b/tests/cut/paired.m14.1.fastq
@@ -0,0 +1,12 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/paired.m14.2.fastq b/tests/cut/paired.m14.2.fastq
new file mode 100644
index 0000000..3cb5248
--- /dev/null
+++ b/tests/cut/paired.m14.2.fastq
@@ -0,0 +1,12 @@
+ at read1/2 other text
+GCTGGAGACAAATAACAGTGGAGTAGTTTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/cut/pairedq.1.fastq b/tests/cut/pairedq.1.fastq
new file mode 100644
index 0000000..e248176
--- /dev/null
+++ b/tests/cut/pairedq.1.fastq
@@ -0,0 +1,8 @@
+ at read1/1 some text
+TTATTTGTCTCCAGC
++
+##HHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACA
++
+HHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/pairedq.2.fastq b/tests/cut/pairedq.2.fastq
new file mode 100644
index 0000000..306314e
--- /dev/null
+++ b/tests/cut/pairedq.2.fastq
@@ -0,0 +1,8 @@
+ at read1/2 other text
+GCTGGAGACAAATAA
++
+HHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGG
++
+#HHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/pairedu.1.fastq b/tests/cut/pairedu.1.fastq
new file mode 100644
index 0000000..7688970
--- /dev/null
+++ b/tests/cut/pairedu.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTTGTCTCCAGCTTAGACATATCGCC
++
+HHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/1
+CAGGCCACATTAGACATATCGGATGG
++
+HHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/1
+ACTTGATATTAATAACATTAGAC
++
+HHHHHHHHHHHHHHHHHHHHHHH
+ at read4/1
+AGGCCGTTTGAATGTTGACGGGATGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/cut/pairedu.2.fastq b/tests/cut/pairedu.2.fastq
new file mode 100644
index 0000000..dbd88d7
--- /dev/null
+++ b/tests/cut/pairedu.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GAGACAAATAACAGTGGAGTAGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/2
+GCCTGTTGCAGTGGAGTAACTCCA
++
+HHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+ATTAATATCAAGTTGGCAG
++
+HHHHHHHHHHHHHHHHHHH
+ at read4/2
+CCGTCAACATTCAAACGGCCTGTC
++
+########################
diff --git a/tests/cut/plus.fastq b/tests/cut/plus.fastq
new file mode 100644
index 0000000..35849f8
--- /dev/null
+++ b/tests/cut/plus.fastq
@@ -0,0 +1,8 @@
+ at first_sequence some other text
+SEQUENCE1
++first_sequence some other text
+:6;;8<=:<
+ at second_sequence and more text
+SEQUENCE2
++second_sequence and more text
+83<??:(61
diff --git a/tests/cut/polya.fasta b/tests/cut/polya.fasta
new file mode 100644
index 0000000..9b12d5c
--- /dev/null
+++ b/tests/cut/polya.fasta
@@ -0,0 +1,2 @@
+>polyAlong
+CTTAGTTCAATWTTAACCAAACTTCAGAACAG
diff --git a/tests/cut/rest.fa b/tests/cut/rest.fa
new file mode 100644
index 0000000..79c64cd
--- /dev/null
+++ b/tests/cut/rest.fa
@@ -0,0 +1,18 @@
+>read1
+TESTING
+>read2
+TESTING
+>read3
+TESTING
+>read4
+TESTING
+>read5
+TESTING
+>read6
+SOMETHING
+>read7
+SOMETHING
+>read8
+REST
+>read9
+NOREST
diff --git a/tests/cut/restfront.fa b/tests/cut/restfront.fa
new file mode 100644
index 0000000..8b51e6c
--- /dev/null
+++ b/tests/cut/restfront.fa
@@ -0,0 +1,18 @@
+>read1
+REST1
+>read2
+RESTING
+>read3
+
+>read4
+RESTLESS
+>read5
+RESTORE
+>read6
+SOMETHING
+>read7
+SOMETHING
+>read8
+SOMETHING
+>read9
+NOREST
diff --git a/tests/cut/s_1_sequence.txt b/tests/cut/s_1_sequence.txt
new file mode 100644
index 0000000..f728223
--- /dev/null
+++ b/tests/cut/s_1_sequence.txt
@@ -0,0 +1,8 @@
+ at first_sequence
+SEQUENCE1
++
+:6;;8<=:<
+ at second_sequence
+SEQUENCE2
++
+83<??:(61
diff --git a/tests/cut/small.fasta b/tests/cut/small.fasta
new file mode 100644
index 0000000..dde4ba1
--- /dev/null
+++ b/tests/cut/small.fasta
@@ -0,0 +1,6 @@
+>prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGA
+>prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCC
+>prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
diff --git a/tests/cut/small.fastq b/tests/cut/small.fastq
new file mode 100644
index 0000000..a3437d1
--- /dev/null
+++ b/tests/cut/small.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGA
++
+)3%)&&&&!.1&(6:<'67..*,:
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCC
++
+;<:&:A;A!9<<<,7:<=3=;:
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/small.trimmed.fastq b/tests/cut/small.trimmed.fastq
new file mode 100644
index 0000000..ecb1729
--- /dev/null
+++ b/tests/cut/small.trimmed.fastq
@@ -0,0 +1,8 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGA
++
+)3%)&&&&!.1&(6:<'67..*,:
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCC
++
+;<:&:A;A!9<<<,7:<=3=;:
diff --git a/tests/cut/small.untrimmed.fastq b/tests/cut/small.untrimmed.fastq
new file mode 100644
index 0000000..d3668fd
--- /dev/null
+++ b/tests/cut/small.untrimmed.fastq
@@ -0,0 +1,4 @@
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/solid-no-zerocap.fastq b/tests/cut/solid-no-zerocap.fastq
new file mode 100644
index 0000000..c666d5c
--- /dev/null
+++ b/tests/cut/solid-no-zerocap.fastq
@@ -0,0 +1,120 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+7&9<&77)& <7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.30213011011132
++
+6)3%)&&&& .1&(6:<'67..*,
+ at 1_13_1259_F3
+T002112130.201222332211
++
+=;<:&:A;A 9<<<,7:<=3=;
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+=<=A:A=57 7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T0133103120031002212223
++
+?><5=;<<<12>=<;1;;=5);
+ at 1_15_1098_F3
+T32333033222233020223032312232220332
++
+#,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at 1_16_404_F3
+T03310320002130202331112
++
+78;:;;><>9=9;<<2=><<1;5
+ at 1_16_904_F3
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 1_16_1315_F3
+T032312311122103330103103
++
+<9<8A?>?::;6&,%;6/)8<<#/
+ at 1_16_1595_F3
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 1_17_1379_F3
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 1_18_1692_F3
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 1_19_171_F3
+T10101101220213201111011320201230032
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at 1_22_72_F3
+T13303032323221212301322233320210233
++
+3/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at 1_22_1377_F3
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 1_23_585_F3
+T300103103101303121221
++
+>55;8><96/18?)<3<58<5
+ at 1_23_809_F3
+T13130101101021211013220302223302112
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at 1_24_138_F3
+T33211130100120323002
++
+6)68/;906#,25/&;<$0+
+ at 1_24_206_F3
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at 1_25_143_F3
+T23202003031200220301303302012203132
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at 1_25_1866_F3
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 1_27_584_F3
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 1_27_1227_F3
+T02003022123001003201002031303302011
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at 1_27_1350_F3
+T13130101101021211013220222221301231
++
+95,)<(4./;<938=64=+2/,.4),3':97#33&
+ at 1_29_477_F3
+T13130101101021211013300302223003030
++
+94=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at 1_30_882_F3
+T20102033000233
++
+2(+-:-3<;5##/;
+ at 1_31_221_F3
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at 1_31_1313_F3
+T0133113130033012232100010101
++
+;3<7=7::)5*4=&;<7>4;795065;9
+ at 1_529_129_F3
+T132222301020322102101322221322302.3302.3.3..221..3
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+ &<-9 % @  )%)  (
diff --git a/tests/cut/solid.fasta b/tests/cut/solid.fasta
new file mode 100644
index 0000000..5428e58
--- /dev/null
+++ b/tests/cut/solid.fasta
@@ -0,0 +1,4 @@
+>problem1
+T0112021202222201123121023103020
+>problem2
+T20201030313112322220210
diff --git a/tests/cut/solid.fastq b/tests/cut/solid.fastq
new file mode 100644
index 0000000..ab2927a
--- /dev/null
+++ b/tests/cut/solid.fastq
@@ -0,0 +1,120 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+7&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.30213011011132
++
+6)3%)&&&&!.1&(6:<'67..*,
+ at 1_13_1259_F3
+T002112130.201222332211
++
+=;<:&:A;A!9<<<,7:<=3=;
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+=<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T0133103120031002212223
++
+?><5=;<<<12>=<;1;;=5);
+ at 1_15_1098_F3
+T32333033222233020223032312232220332
++
+#,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at 1_16_404_F3
+T03310320002130202331112
++
+78;:;;><>9=9;<<2=><<1;5
+ at 1_16_904_F3
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 1_16_1315_F3
+T032312311122103330103103
++
+<9<8A?>?::;6&,%;6/)8<<#/
+ at 1_16_1595_F3
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 1_17_1379_F3
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 1_18_1692_F3
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 1_19_171_F3
+T10101101220213201111011320201230032
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at 1_22_72_F3
+T13303032323221212301322233320210233
++
+3/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at 1_22_1377_F3
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 1_23_585_F3
+T300103103101303121221
++
+>55;8><96/18?)<3<58<5
+ at 1_23_809_F3
+T13130101101021211013220302223302112
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at 1_24_138_F3
+T33211130100120323002
++
+6)68/;906#,25/&;<$0+
+ at 1_24_206_F3
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at 1_25_143_F3
+T23202003031200220301303302012203132
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at 1_25_1866_F3
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 1_27_584_F3
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 1_27_1227_F3
+T02003022123001003201002031303302011
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at 1_27_1350_F3
+T13130101101021211013220222221301231
++
+95,)<(4./;<938=64=+2/,.4),3':97#33&
+ at 1_29_477_F3
+T13130101101021211013300302223003030
++
+94=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at 1_30_882_F3
+T20102033000233
++
+2(+-:-3<;5##/;
+ at 1_31_221_F3
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at 1_31_1313_F3
+T0133113130033012232100010101
++
+;3<7=7::)5*4=&;<7>4;795065;9
+ at 1_529_129_F3
+T132222301020322102101322221322302.3302.3.3..221..3
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+!&<-9!%!@!!)%)!!(
diff --git a/tests/cut/solid5p-anchored.fasta b/tests/cut/solid5p-anchored.fasta
new file mode 100644
index 0000000..a779451
--- /dev/null
+++ b/tests/cut/solid5p-anchored.fasta
@@ -0,0 +1,32 @@
+>read1
+212322332333012001112122203233202221000211
+>read2
+01212322332333200121311212133113001311002032
+>read3
+2201212322332333211133003002232323010012320300
+>read4
+02010102312033021011121312131
+>read5
+21313210102120020302022233110
+>read6
+31203203013323021010020301321
+>read7
+1301020302201212322332333203020130202120211322010013211
+>read8
+310321030130120302201212322332333232202123123111113113003200330
+>read9
+002132103320302201212322332333020123133023120320131020333011
+>read10
+0322031320033220302201212322332333201130233321321011303133231200
+>read11
+02010102312033021011121312131
+>read12
+1
+>read13
+
+>read14
+
+>read15
+
+>read16
+
diff --git a/tests/cut/solid5p-anchored.fastq b/tests/cut/solid5p-anchored.fastq
new file mode 100644
index 0000000..c1da73d
--- /dev/null
+++ b/tests/cut/solid5p-anchored.fastq
@@ -0,0 +1,64 @@
+ at read1
+212322332333012001112122203233202221000211
++
+58)2";%4A,8>0;9C\'?276>#)49"<,>?/\'!A4$.%+
+ at read2
+01212322332333200121311212133113001311002032
++
+4<@;(<3.37/''=:-9AA<&C2%$$;?A&5!C69:?-;&;65.
+ at read3
+2201212322332333211133003002232323010012320300
++
+!<A-BB&A/)'103&2$!00>#97*B.0A-@(*","B3><4&16(:
+ at read4
+02010102312033021011121312131
++
+&-81+%)7;<)6?83!&CB9"9B6307=&
+ at read5
+21313210102120020302022233110
++
+9)27,(-*=,#4:;"/4++5<, at -784*'
+ at read6
+31203203013323021010020301321
++
+!.;:C%97@>75-";';*)A67CCC")$*
+ at read7
+1301020302201212322332333203020130202120211322010013211
++
+;0B at A"98!<=!*;5;650;';79!+8,4(2=+98:B at C@:+3*>2+6+2++C0.
+ at read8
+310321030130120302201212322332333232202123123111113113003200330
++
+/$-"=6+1.8?AB!?'#.585 at 6:47@?>.315A-'9<%">6,+)*,)1-;:(691>?C)4A;
+ at read9
+002132103320302201212322332333020123133023120320131020333011
++
+&?527&:=;6 at 6@03%95(-0#$:B8::B*4?@&)6>79C>)6C'5-#<!B:>0:A8+2*
+ at read10
+0322031320033220302201212322332333201130233321321011303133231200
++
+53)>2.+9?7%=&21;8!820961%3#0'5C.28347,2(55*1.,>%:(1A'A5=@7&&5?4'
+ at read11
+02010102312033021011121312131
++
+8B"195'@,@&:5=7;!&-9:%<!)>((>
+ at read12
+1
++
+C
+ at read13
+
++
+
+ at read14
+
++
+
+ at read15
+
++
+
+ at read16
+
++
+
diff --git a/tests/cut/solid5p-anchored.notrim.fasta b/tests/cut/solid5p-anchored.notrim.fasta
new file mode 100644
index 0000000..bdfe76d
--- /dev/null
+++ b/tests/cut/solid5p-anchored.notrim.fasta
@@ -0,0 +1,32 @@
+>read1
+T1212322332333012001112122203233202221000211
+>read2
+T201212322332333200121311212133113001311002032
+>read3
+T02201212322332333211133003002232323010012320300
+>read4
+T302010102312033021011121312131
+>read5
+T121313210102120020302022233110
+>read6
+T331203203013323021010020301321
+>read7
+T21301020302201212322332333203020130202120211322010013211
+>read8
+T2310321030130120302201212322332333232202123123111113113003200330
+>read9
+T0002132103320302201212322332333020123133023120320131020333011
+>read10
+T00322031320033220302201212322332333201130233321321011303133231200
+>read11
+T402010102312033021011121312131
+>read12
+T11
+>read13
+T1
+>read14
+T
+>read15
+T
+>read16
+T
diff --git a/tests/cut/solid5p-anchored.notrim.fastq b/tests/cut/solid5p-anchored.notrim.fastq
new file mode 100644
index 0000000..946aa9c
--- /dev/null
+++ b/tests/cut/solid5p-anchored.notrim.fastq
@@ -0,0 +1,64 @@
+ at read1
+T1212322332333012001112122203233202221000211
++
+:58)2";%4A,8>0;9C\'?276>#)49"<,>?/\'!A4$.%+
+ at read2
+T201212322332333200121311212133113001311002032
++
+44<@;(<3.37/''=:-9AA<&C2%$$;?A&5!C69:?-;&;65.
+ at read3
+T02201212322332333211133003002232323010012320300
++
+2!<A-BB&A/)'103&2$!00>#97*B.0A-@(*","B3><4&16(:
+ at read4
+T302010102312033021011121312131
++
+<&-81+%)7;<)6?83!&CB9"9B6307=&
+ at read5
+T121313210102120020302022233110
++
+$9)27,(-*=,#4:;"/4++5<, at -784*'
+ at read6
+T331203203013323021010020301321
++
+4!.;:C%97@>75-";';*)A67CCC")$*
+ at read7
+T21301020302201212322332333203020130202120211322010013211
++
+,;0B at A"98!<=!*;5;650;';79!+8,4(2=+98:B at C@:+3*>2+6+2++C0.
+ at read8
+T2310321030130120302201212322332333232202123123111113113003200330
++
+C/$-"=6+1.8?AB!?'#.585 at 6:47@?>.315A-'9<%">6,+)*,)1-;:(691>?C)4A;
+ at read9
+T0002132103320302201212322332333020123133023120320131020333011
++
+(&?527&:=;6 at 6@03%95(-0#$:B8::B*4?@&)6>79C>)6C'5-#<!B:>0:A8+2*
+ at read10
+T00322031320033220302201212322332333201130233321321011303133231200
++
+&53)>2.+9?7%=&21;8!820961%3#0'5C.28347,2(55*1.,>%:(1A'A5=@7&&5?4'
+ at read11
+T402010102312033021011121312131
++
+&8B"195'@,@&:5=7;!&-9:%<!)>((>
+ at read12
+T11
++
+?C
+ at read13
+T1
++
+C
+ at read14
+T
++
+
+ at read15
+T
++
+
+ at read16
+T
++
+
diff --git a/tests/cut/solid5p.fasta b/tests/cut/solid5p.fasta
new file mode 100644
index 0000000..29c26a6
--- /dev/null
+++ b/tests/cut/solid5p.fasta
@@ -0,0 +1,32 @@
+>read1
+12001112122203233202221000211
+>read2
+00121311212133113001311002032
+>read3
+11133003002232323010012320300
+>read4
+02010102312033021011121312131
+>read5
+21313210102120020302022233110
+>read6
+31203203013323021010020301321
+>read7
+03020130202120211322010013211
+>read8
+32202123123111113113003200330
+>read9
+20123133023120320131020333011
+>read10
+01130233321321011303133231200
+>read11
+02010102312033021011121312131
+>read12
+1
+>read13
+
+>read14
+
+>read15
+
+>read16
+
diff --git a/tests/cut/solid5p.fastq b/tests/cut/solid5p.fastq
new file mode 100644
index 0000000..5849d87
--- /dev/null
+++ b/tests/cut/solid5p.fastq
@@ -0,0 +1,64 @@
+ at read1
+12001112122203233202221000211
++
+;9C\'?276>#)49"<,>?/\'!A4$.%+
+ at read2
+00121311212133113001311002032
++
+-9AA<&C2%$$;?A&5!C69:?-;&;65.
+ at read3
+11133003002232323010012320300
++
+!00>#97*B.0A-@(*","B3><4&16(:
+ at read4
+02010102312033021011121312131
++
+&-81+%)7;<)6?83!&CB9"9B6307=&
+ at read5
+21313210102120020302022233110
++
+9)27,(-*=,#4:;"/4++5<, at -784*'
+ at read6
+31203203013323021010020301321
++
+!.;:C%97@>75-";';*)A67CCC")$*
+ at read7
+03020130202120211322010013211
++
+8,4(2=+98:B at C@:+3*>2+6+2++C0.
+ at read8
+32202123123111113113003200330
++
+-'9<%">6,+)*,)1-;:(691>?C)4A;
+ at read9
+20123133023120320131020333011
++
+?@&)6>79C>)6C'5-#<!B:>0:A8+2*
+ at read10
+01130233321321011303133231200
++
+47,2(55*1.,>%:(1A'A5=@7&&5?4'
+ at read11
+02010102312033021011121312131
++
+8B"195'@,@&:5=7;!&-9:%<!)>((>
+ at read12
+1
++
+C
+ at read13
+
++
+
+ at read14
+
++
+
+ at read15
+
++
+
+ at read16
+
++
+
diff --git a/tests/cut/solidbfast.fastq b/tests/cut/solidbfast.fastq
new file mode 100644
index 0000000..c9117c4
--- /dev/null
+++ b/tests/cut/solidbfast.fastq
@@ -0,0 +1,120 @@
+ at abc:1_13_85
+T110020300.0113010210002110102330021
++
+7&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at abc:1_13_573
+T312311200.30213011011132
++
+6)3%)&&&&!.1&(6:<'67..*,
+ at abc:1_13_1259
+T002112130.201222332211
++
+=;<:&:A;A!9<<<,7:<=3=;
+ at abc:1_13_1440
+T110020313.1113211010332111302330001
++
+=<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at abc:1_14_177
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at abc:1_14_238
+T0133103120031002212223
++
+?><5=;<<<12>=<;1;;=5);
+ at abc:1_15_1098
+T32333033222233020223032312232220332
++
+#,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at abc:1_16_404
+T03310320002130202331112
++
+78;:;;><>9=9;<<2=><<1;5
+ at abc:1_16_904
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at abc:1_16_1315
+T032312311122103330103103
++
+<9<8A?>?::;6&,%;6/)8<<#/
+ at abc:1_16_1595
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at abc:1_17_1379
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at abc:1_18_1692
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at abc:1_19_171
+T10101101220213201111011320201230032
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at abc:1_22_72
+T13303032323221212301322233320210233
++
+3/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at abc:1_22_1377
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at abc:1_23_585
+T300103103101303121221
++
+>55;8><96/18?)<3<58<5
+ at abc:1_23_809
+T13130101101021211013220302223302112
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at abc:1_24_138
+T33211130100120323002
++
+6)68/;906#,25/&;<$0+
+ at abc:1_24_206
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at abc:1_25_143
+T23202003031200220301303302012203132
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at abc:1_25_1866
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at abc:1_27_584
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at abc:1_27_1227
+T02003022123001003201002031303302011
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at abc:1_27_1350
+T13130101101021211013220222221301231
++
+95,)<(4./;<938=64=+2/,.4),3':97#33&
+ at abc:1_29_477
+T13130101101021211013300302223003030
++
+94=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at abc:1_30_882
+T20102033000233
++
+2(+-:-3<;5##/;
+ at abc:1_31_221
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at abc:1_31_1313
+T0133113130033012232100010101
++
+;3<7=7::)5*4=&;<7>4;795065;9
+ at abc:1_529_129
+T132222301020322102101322221322302.3302.3.3..221..3
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+!&<-9!%!@!!)%)!!(
diff --git a/tests/cut/solidmaq.fastq b/tests/cut/solidmaq.fastq
new file mode 100644
index 0000000..195be3d
--- /dev/null
+++ b/tests/cut/solidmaq.fastq
@@ -0,0 +1,120 @@
+ at 552:1_13_85/1
+CAAGATAANACCTACAGCAAAGCCACAGTTAAGC
++
+&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at 552:1_13_573/1
+CGTCCGAANTAGCTACCACCCTG
++
+)3%)&&&&!.1&(6:<'67..*,
+ at 552:1_13_1259/1
+AGCCGCTANGACGGGTTGGCC
++
+;<:&:A;A!9<<<,7:<=3=;
+ at 552:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at 552:1_14_177/1
+CTTAGGGAGAGTTTGCCGCTGTTAGACTTATTCC
++
+8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 552:1_14_238/1
+CTTCATCGAATCAAGGCGGGT
++
+><5=;<<<12>=<;1;;=5);
+ at 552:1_15_1098/1
+GTTTATTGGGGTTAGAGGTATGTCGGTGGGATTG
++
+,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at 552:1_16_404/1
+TTCATGAAAGCTAGAGTTCCCG
++
+8;:;;><>9=9;<<2=><<1;5
+ at 552:1_16_904/1
+CGTACAGTTCAGGTCGGTGCTGAGCCGGCCCGCG
++
+>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 552:1_16_1315/1
+TGTCGTCCCGGCATTTACATCAT
++
+9<8A?>?::;6&,%;6/)8<<#/
+ at 552:1_16_1595/1
+GTGTGCCTCGCCCGTAAGGGCAACCGCTTAGACG
++
+,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 552:1_17_1379/1
+GACCGCGCCCGGTGTAGTGCTGTCCTGCGAACGT
++
+-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 552:1_18_1692/1
+GTGGGTTATCCAAGCCGTTTGTTAACCGGAAGCA
++
+#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 552:1_19_171/1
+ACACCACGGAGCTGACCCCACCTGAGACGTAATG
++
+6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at 552:1_22_72/1
+TTATATGTGTGGCGCGTACTGGGTTTGAGCAGTT
++
+/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at 552:1_22_1377/1
+GGGCTTTTCCGGGTCGGACCTGTCGAGGTGGTAA
++
+##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 552:1_23_585/1
+AACATCATCACTATCGCGGC
++
+55;8><96/18?)<3<58<5
+ at 552:1_23_809/1
+TCTACACCACAGCGCCACTGGATAGGGTTAGCCG
++
+7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at 552:1_24_138/1
+TGCCCTACAACGATGTAAG
++
+)68/;906#,25/&;<$0+
+ at 552:1_24_206/1
+TTTATTGAAGGGTAAGAGATATTTCTGCGGCAAA
++
+)4(&)9592)#)694(,)292:(=7$.18,()65
+ at 552:1_25_143/1
+TGAGAATATCGAAGGATACTATTAGACGGATCTG
++
+4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at 552:1_25_1866/1
+TGACTGCAGGCTCCACCCGACGTTAGGCCTATCC
++
+<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 552:1_27_584/1
+AACATTACCACATGCTCCGTGTTATACGCATCAC
++
+2'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 552:1_27_1227/1
+GAATAGGCGTAACAATGACAAGATCTATTAGACC
++
+92:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at 552:1_27_1350/1
+TCTACACCACAGCGCCACTGGAGGGGGCTACGTC
++
+5,)<(4./;<938=64=+2/,.4),3':97#33&
+ at 552:1_29_477/1
+TCTACACCACAGCGCCACTTAATAGGGTAATATA
++
+4=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at 552:1_30_882/1
+ACAGATTAAAGTT
++
+(+-:-3<;5##/;
+ at 552:1_31_221/1
+TTACTCCGACCAAATATAACAAGTTGGACAGATC
++
+9>9>5<139/,&:7969972.274&%:78&&746
+ at 552:1_31_1313/1
+CTTCCTCTAATTACGGTGCAAACACAC
++
+3<7=7::)5*4=&;<7>4;795065;9
+ at 552:1_529_129/1
+TGGGGTACAGATGGCAGCACTGGGGCTGGTAGNTTAGNTNTNNGGCNNT
++
+>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+!&<-9!%!@!!)%)!!(
diff --git a/tests/cut/solidqual.fastq b/tests/cut/solidqual.fastq
new file mode 100644
index 0000000..80f4714
--- /dev/null
+++ b/tests/cut/solidqual.fastq
@@ -0,0 +1,120 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+7&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.3021301101113203302010003
++
+6)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at 1_13_1259_F3
+T002112130.201222332211133020123031
++
+=;<:&:A;A!9<<<,7:<=3=;:<&<?<?8<;=<
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+=<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T01331031200310022122230330201030313
++
+?><5=;<<<12>=<;1;;=5);.;14:0>2;:3;7
+ at 1_15_1098_F3
+T
++
+
+ at 1_16_404_F3
+T03310320002130202331112133020103031
++
+78;:;;><>9=9;<<2=><<1;58;9<<;>(<;<;
+ at 1_16_904_F3
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 1_16_1315_F3
+T0323123111221033301031032330201000
++
+<9<8A?>?::;6&,%;6/)8<<#/;79(448&*.
+ at 1_16_1595_F3
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 1_17_1379_F3
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 1_18_1692_F3
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 1_19_171_F3
+T10101101220213201111011320201230
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:
+ at 1_22_72_F3
+T133030323232212123013222333202
++
+3/#678<:.=9::6:(<538295;9+;&*;
+ at 1_22_1377_F3
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 1_23_585_F3
+T30010310310130312122123302013303131
++
+>55;8><96/18?)<3<58<5:;96=7:1=8=:-<
+ at 1_23_809_F3
+T131301011010212110132203022233021
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221
+ at 1_24_138_F3
+T3321113010012032300203302012303131
++
+6)68/;906#,25/&;<$0+250#2,<)5,9/+7
+ at 1_24_206_F3
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at 1_25_143_F3
+T2320200303120022030130330201220313
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2
+ at 1_25_1866_F3
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 1_27_584_F3
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 1_27_1227_F3
+T0200302212300100320100203130330201
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>
+ at 1_27_1350_F3
+T1313010110102121101322022222130123
++
+95,)<(4./;<938=64=+2/,.4),3':97#33
+ at 1_29_477_F3
+T13130101101021211013300302223
++
+94=55:75=+:/7><968;;#&+$#3&6,
+ at 1_30_882_F3
+T20102033000233133320103031311233200
++
+2(+-:-3<;5##/;:(%&84'#:,?3&&8>-();5
+ at 1_31_221_F3
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at 1_31_1313_F3
+T01331131300330122321000101010330201
++
+;3<7=7::)5*4=&;<7>4;795065;9';896'=
+ at 1_529_129_F3
+T132222301020322102101322221322302.3302
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+!&<-9
diff --git a/tests/cut/sra.fastq b/tests/cut/sra.fastq
new file mode 100644
index 0000000..ea95638
--- /dev/null
+++ b/tests/cut/sra.fastq
@@ -0,0 +1,24 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+7&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.30213011011132
++
+6)3%)&&&&!.1&(6:<'67..*,
+ at 1_13_1259_F3
+T002112130.201222332211
++
+=;<:&:A;A!9<<<,7:<=3=;
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+=<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T0133103120031002212223
++
+?><5=;<<<12>=<;1;;=5);
diff --git a/tests/cut/stripped.fasta b/tests/cut/stripped.fasta
new file mode 100644
index 0000000..2ca63a2
--- /dev/null
+++ b/tests/cut/stripped.fasta
@@ -0,0 +1,4 @@
+>first
+SEQUENCE1
+>second
+SEQUENCE2
diff --git a/tests/cut/suffix.fastq b/tests/cut/suffix.fastq
new file mode 100644
index 0000000..72392e0
--- /dev/null
+++ b/tests/cut/suffix.fastq
@@ -0,0 +1,120 @@
+ at 1_13_85_my_suffix_no_adapter
+T110020300.0113010210002110102330021
++
+7&9<&77)&!<7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_my_suffix_1
+T312311200.30213011011132
++
+6)3%)&&&&!.1&(6:<'67..*,
+ at 1_13_1259_my_suffix_1
+T002112130.201222332211
++
+=;<:&:A;A!9<<<,7:<=3=;
+ at 1_13_1440_my_suffix_no_adapter
+T110020313.1113211010332111302330001
++
+=<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_my_suffix_no_adapter
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_my_suffix_1
+T0133103120031002212223
++
+?><5=;<<<12>=<;1;;=5);
+ at 1_15_1098_my_suffix_no_adapter
+T32333033222233020223032312232220332
++
+#,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at 1_16_404_my_suffix_1
+T03310320002130202331112
++
+78;:;;><>9=9;<<2=><<1;5
+ at 1_16_904_my_suffix_no_adapter
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 1_16_1315_my_suffix_1
+T032312311122103330103103
++
+<9<8A?>?::;6&,%;6/)8<<#/
+ at 1_16_1595_my_suffix_no_adapter
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 1_17_1379_my_suffix_no_adapter
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 1_18_1692_my_suffix_no_adapter
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 1_19_171_my_suffix_no_adapter
+T10101101220213201111011320201230032
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at 1_22_72_my_suffix_no_adapter
+T13303032323221212301322233320210233
++
+3/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at 1_22_1377_my_suffix_no_adapter
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 1_23_585_my_suffix_1
+T300103103101303121221
++
+>55;8><96/18?)<3<58<5
+ at 1_23_809_my_suffix_no_adapter
+T13130101101021211013220302223302112
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at 1_24_138_my_suffix_1
+T33211130100120323002
++
+6)68/;906#,25/&;<$0+
+ at 1_24_206_my_suffix_no_adapter
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at 1_25_143_my_suffix_no_adapter
+T23202003031200220301303302012203132
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at 1_25_1866_my_suffix_no_adapter
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 1_27_584_my_suffix_no_adapter
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 1_27_1227_my_suffix_no_adapter
+T02003022123001003201002031303302011
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at 1_27_1350_my_suffix_no_adapter
+T13130101101021211013220222221301231
++
+95,)<(4./;<938=64=+2/,.4),3':97#33&
+ at 1_29_477_my_suffix_no_adapter
+T13130101101021211013300302223003030
++
+94=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at 1_30_882_my_suffix_1
+T20102033000233
++
+2(+-:-3<;5##/;
+ at 1_31_221_my_suffix_no_adapter
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at 1_31_1313_my_suffix_1
+T0133113130033012232100010101
++
+;3<7=7::)5*4=&;<7>4;795065;9
+ at 1_529_129_my_suffix_no_adapter
+T132222301020322102101322221322302.3302.3.3..221..3
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+!&<-9!%!@!!)%)!!(
diff --git a/tests/cut/trimN3.fasta b/tests/cut/trimN3.fasta
new file mode 100644
index 0000000..c05f5ed
--- /dev/null
+++ b/tests/cut/trimN3.fasta
@@ -0,0 +1,2 @@
+>read1
+CAGTCGGTCCTGAGAGATGGGCGAGCGCTGG
diff --git a/tests/cut/trimN5.fasta b/tests/cut/trimN5.fasta
new file mode 100644
index 0000000..b1faa5f
--- /dev/null
+++ b/tests/cut/trimN5.fasta
@@ -0,0 +1,2 @@
+>read1
+GGCCTGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAG
diff --git a/tests/cut/twoadapters.fasta b/tests/cut/twoadapters.fasta
new file mode 100644
index 0000000..c03a129
--- /dev/null
+++ b/tests/cut/twoadapters.fasta
@@ -0,0 +1,6 @@
+>read1
+GATCCTCCTGGAGCTGGCTGATACCAGTATACCAGTGCTGATTGTTG
+>read2
+CTCGAGAATTCTGGATCCTCTCTTCTGCTACCTTTGGGATTTGCTTGCTCTTG
+>read3 (no adapter)
+AATGAAGGTTGTAACCATAACAGGAAGTCATGCGCATTTAGTCGAGCACGTAAGTTCATACGGAAATGGGTAAG
diff --git a/tests/cut/twoadapters.first.fasta b/tests/cut/twoadapters.first.fasta
new file mode 100644
index 0000000..aab7419
--- /dev/null
+++ b/tests/cut/twoadapters.first.fasta
@@ -0,0 +1,2 @@
+>read1
+GATCCTCCTGGAGCTGGCTGATACCAGTATACCAGTGCTGATTGTTG
diff --git a/tests/cut/twoadapters.second.fasta b/tests/cut/twoadapters.second.fasta
new file mode 100644
index 0000000..2c491d3
--- /dev/null
+++ b/tests/cut/twoadapters.second.fasta
@@ -0,0 +1,2 @@
+>read2
+CTCGAGAATTCTGGATCCTCTCTTCTGCTACCTTTGGGATTTGCTTGCTCTTG
diff --git a/tests/cut/twoadapters.unknown.fasta b/tests/cut/twoadapters.unknown.fasta
new file mode 100644
index 0000000..88f7875
--- /dev/null
+++ b/tests/cut/twoadapters.unknown.fasta
@@ -0,0 +1,2 @@
+>read3 (no adapter)
+AATGAAGGTTGTAACCATAACAGGAAGTCATGCGCATTTAGTCGAGCACGTAAGTTCATACGGAAATGGGTAAG
diff --git a/tests/cut/unconditional-back.fastq b/tests/cut/unconditional-back.fastq
new file mode 100644
index 0000000..d03f33e
--- /dev/null
+++ b/tests/cut/unconditional-back.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGATTAGA
++
+)3%)&&&&!.1&(6:<'67..*,:75)'7
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCCTTAGACG
++
+;<:&:A;A!9<<<,7:<=3=;:<&<?<?8
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGT
++
+<=A:A=57!7<';<6?5;;6:+:=)71>7
diff --git a/tests/cut/unconditional-both.fastq b/tests/cut/unconditional-both.fastq
new file mode 100644
index 0000000..303b042
--- /dev/null
+++ b/tests/cut/unconditional-both.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+GAANTAGCTACCACCCTGATTAGA
++
+&&&!.1&(6:<'67..*,:75)'7
+ at prefix:1_13_1259/1
+CTANGACGGGTTGGCCCTTAGACG
++
+A;A!9<<<,7:<=3=;:<&<?<?8
+ at prefix:1_13_1440/1
+TCTNCCCTGCCACATTGCCCTAGT
++
+=57!7<';<6?5;;6:+:=)71>7
diff --git a/tests/cut/unconditional-front.fastq b/tests/cut/unconditional-front.fastq
new file mode 100644
index 0000000..383b3db
--- /dev/null
+++ b/tests/cut/unconditional-front.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+GAANTAGCTACCACCCTGATTAGACAAAT
++
+&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1259/1
+CTANGACGGGTTGGCCCTTAGACGTATCT
++
+A;A!9<<<,7:<=3=;:<&<?<?8<;=<&
+ at prefix:1_13_1440/1
+TCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/cut/wildcard.fa b/tests/cut/wildcard.fa
new file mode 100644
index 0000000..2dae07a
--- /dev/null
+++ b/tests/cut/wildcard.fa
@@ -0,0 +1,4 @@
+>1
+TGCATGCA
+>2
+TGCATGCA
diff --git a/tests/cut/wildcardN.fa b/tests/cut/wildcardN.fa
new file mode 100644
index 0000000..ef44dbc
--- /dev/null
+++ b/tests/cut/wildcardN.fa
@@ -0,0 +1,6 @@
+>perfect
+TTT
+>withN
+TTT
+>1mism
+TTTGGGGCGG
diff --git a/tests/cut/wildcard_adapter.fa b/tests/cut/wildcard_adapter.fa
new file mode 100644
index 0000000..27d5dab
--- /dev/null
+++ b/tests/cut/wildcard_adapter.fa
@@ -0,0 +1,8 @@
+>1
+
+>2
+
+>3b
+TGGCTGGCC
+>4b
+TGGCTGGCC
diff --git a/tests/cut/wildcard_adapter_anywhere.fa b/tests/cut/wildcard_adapter_anywhere.fa
new file mode 100644
index 0000000..8ba6688
--- /dev/null
+++ b/tests/cut/wildcard_adapter_anywhere.fa
@@ -0,0 +1,8 @@
+>1
+TGCATGCA
+>2
+TGCATGCA
+>3b
+TGGCTGGCC
+>4b
+TGGCTGGCC
diff --git a/tests/data/454.fa b/tests/data/454.fa
new file mode 100644
index 0000000..92caddf
--- /dev/null
+++ b/tests/data/454.fa
@@ -0,0 +1,118 @@
+>000163_1255_2627 length=52 uaccno=E0R4ISW01DCIQD
+CCATCTCATCCCTGCGTGTCCCATCTGTTCCCTTCCTTGTCTCAGTGTGGTG
+>000652_1085_0667 length=122 uaccno=E0R4ISW01CXJXP
+ATTGAAGAGGTTGGTAAGTTTTAAGTTGGTAGGTGGTTGGGGAGTGGTTGGAGAGGAGTTGTTGGGAGTTTGTGTCCTGCTGAGACACGCAACGGGGATAGGCAAGGCACACAGGGGATAGG
+>000653_1285_1649 length=135 uaccno=E0R4ISW01DE4SJ
+AATTAGTCGAGCGTTGTGGTGGGTATTTGTAATTTTAGCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGGAGGTTGCTGAGACACGCAACAGGAGATAGGCAAGGCACACAGGGGATAGG
+>000902_0715_2005 length=92 uaccno=E0R4ISW01B03K3
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>001146_1255_0340 length=92 uaccno=E0R4ISW01DCGYU
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>001210_1147_1026 length=171 uaccno=E0R4ISW01C2Z5W
+TAGGGAGGTGGTGAGTGTTGTGTGTTTAGATTGTGTGTGGTGGTTGGGAGTGGGAGTTGTATTTTAGGGTGTGGGTTGGGAGAGTGAAAGTTGTGGGTGTTTTGGATGGTGGGTTAGGTGGTTGTGCCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>001278_1608_2022 length=109 uaccno=E0R4ISW01D7HW4
+CACACACACTCTTCCCCATACCTACTCACACACACACACACACACACAAACATACACAAATAATTCTGAGACACGCAACAGGAGATAGGCAAGGCACACAGGGGATAGG
+>001333_1518_1176 length=142 uaccno=E0R4ISW01DZKTM
+AATTGTCGTTTGATTGTTGGAAAGTAGAGGGTCGGGTTGGGGTAGATTCGAAAGGGGAATTTTGAGAAAAGAAATGGAGGGAGGTAGGAAAATTTTTTGCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>001398_1584_1549 length=154 uaccno=E0R4ISW01D5DPB
+TAATGAAATGGAATGGAATGGAATGGAATGAAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATGAAATGGAATGGAGTATAAAGGAATGGAATTACTGAGACACGCAACAGGGGAAGGCAAGGCACACAGGGGATAGG
+>001455_1136_2179 length=92 uaccno=E0R4ISW01C12AD
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>001481_1165_0549 length=92 uaccno=E0R4ISW01C4KON
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>001744_1376_3512 length=144 uaccno=E0R4ISW01DM5T2
+TAAGTAGGGAAGGTTTGAGGTTGTTGGTGTTGGTAGTAGGGGTGTTTTAGTTAGGGGTTGTAGTTTGTTAAGGGAATTTTATTTGAGTTTAGAATTGAGGCTGAGACACGCAAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>001893_1084_1137 length=162 uaccno=E0R4ISW01CXG4Z
+TGTATATTTTGTTGGGTTTGTATATATTGTTAGGTGTGGTTGGTGAGTTGTATTGGTGGTGGTGTAAGGTGAGTGGAAATGGGAATGGATTGTAGATATGTTGGATTTGTGGTTTTTGGTTGAGACACGAACAGGGGATAGGCAAGGCACACAGGGGATAGG
+>001927_0254_0706 length=182 uaccno=E0R4ISW01AWLLG
+TGGAATCATCTAAGGGACACAAATAGAATCATCATTGAATGGAATCGAATGGAATCATCTAATGTACTCGAATGGAATTATTATTGAATAGAATAGAATGGAATTATCGAATGGAATCAAATGGAATGTAATGGAATGCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>002007_1338_1037 length=139 uaccno=E0R4ISW01DJRTR
+GGGTTGTGTATTTGGATAGTATGTGGAAAATGGTATTAAAAAGAATTTGTAGTTGGATTGTTGGTGGTTATTTAGTTTTTGGGTAATGGGTAGATTCCTGAGACACGCAAAGGGATAGGCAAGGCACACAGGGGATAGG
+>002186_1130_0654 length=92 uaccno=E0R4ISW01C1H5C
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>002282_1237_2702 length=134 uaccno=E0R4ISW01DAXWG
+AATTAGCCGGGCGTGATGGCGGGCGTTTGTAGTTTTAGTTATTCGGGAGGTTGAGGTAGGAGAATGGCGTGAATTCGGGAAGCGGAGTTTGCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>002382_1259_0997 length=107 uaccno=E0R4ISW01DCT37
+TAAGGGTTGAAGCGAGGTAGGTAGTTTGTTTGTGGTTTTGTTTCGTATTTTTGTTTCGTATCCCTGAGACACGCAACAGAGGATAGGCAAGGCACACAGGGGATAGG
+>002477_0657_0655 length=174 uaccno=E0R4ISW01BVY8H
+TTTTTGGAAAGTTGGGTGGGTATAGTTTTGAGTAGTTAGAGGTATTATAATAGTATTAGGAAGTTGAATGTGAGGGTATAAGAGTTAATTTGATTTTTCGTTGATATGTTTGTTGTTTGAAGTTAGAGTGCTGAGACACGCAACAGGAGATAGGCAAGGCACACAGGGGATAGG
+>003149_1553_2333 length=170 uaccno=E0R4ISW01D2OBZ
+TATTTAGTTTTAGTTTGTTTAGGTGGTTATAGAATACGGAGTTTATGAAGTTGATTAGGAATATTATTAGTTGAATTAAGAATTGGGAAGAGAGGGGAACGGGAAGGGACGTGAGTGATTATTATTGCTGAGACACGCAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>003194_1475_2845 length=101 uaccno=E0R4ISW01DVT7J
+TATTTTGGGTTAAGTCGGGTTTAGTTGTTAGGGCGAGAAGTTAGTTGTTGACCCCTGCTGAGACACGCAAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>003206_1315_0479 length=95 uaccno=E0R4ISW01DHQPD
+GGGTTGGATAATATGATGGTGTTGGGGAATATTTAGGTATGTGGTTTGTGGCTGAGACACGCAACAGAGGATAGGCAAGGCACACAGGGGATAGG
+>003271_0173_0314 length=125 uaccno=E0R4ISW01APHAK
+GTTTATTTGTTATTTATTTTTAGGTTTAGAAGAGTGTTTGGTATTTATTGAGGATTTAGTATTTGTTAGAAGGATTGGATTCTGAGACACGCAACAGGGGGTAGGCAAGGCACACAGGGGATAGG
+>003443_1737_2250 length=67 uaccno=E0R4ISW01EITSS
+TGTAGGTTGTGTTGTAGGTTGTCCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>002633_1776_1582 length=81 uaccno=E0R4ISW01EL8JK
+CAGGGTGGATTGGGGAACACACAGTGTGGCCGCGTGATTCTGAGACACGCAACAGGGAAGGCAAGGCACACAGGGGATAGG
+>002663_0725_3154 length=126 uaccno=E0R4ISW01B1Z2S
+GCGTTTTATATTATAATTTAATATTTTGGAGGTTGGGTGCGGTGGTTTACGTTTGTAGTTTAGTATTTGGGAGGTTAAGGTAGCTGAGACACGCAACGGGGATAGGCAAGGCACACAGGGGATAGG
+>002761_1056_4055 length=121 uaccno=E0R4ISW01CU2V9
+AATTTTATTCGATTTATGTGATGATTTATTTATTTTATTTGAAGATGATTTTATTCGAGATTATTCGATGATTCCATTCCTGAGACACGCAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>002843_0289_2275 length=122 uaccno=E0R4ISW01AZPE9
+ATTGAAGAGGTTGGTAAGTTTTAAGTTGGTAGGTGGTTGGGGAGTGGTTGGAGAGGAGTTGTTGGGAGTTTGTGTCCTGCTGAGACACGCAACGGGGATAGGCAAGGCACACAGGGGATAGG
+>002934_1762_2177 length=92 uaccno=E0R4ISW01EK0Q7
+GGGTGTTGAATTTAATATGTAGTATATTGATTTGTGATGATTATTTTGCCTGAGACACGCAACAGGGGTAGGCAAGGCACACAGGGGATAGG
+>003515_1711_1058 length=122 uaccno=E0R4ISW01EGIPG
+AATTGAATGGAATTATTATTGAATGGATTCGAATGGAATTATTATTGAATGGAATCATCGAGTGGAATCGAATGGAATCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>003541_1276_1589 length=112 uaccno=E0R4ISW01DECAV
+TAGTTTAGGGTGGTAGTTTGGATAAGGTAGTTTTACGGTTTAGTAGTAGTAGGTTAAGTAGGAAAACTGCTGAGACACGCAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>003587_1522_1804 length=152 uaccno=E0R4ISW01DZXX6
+AATTTATGTAGTGGAAGTAGGATATAAAGAATAGGTTAATGGATTTTGAGATATTAAAAAGAGTAGGAAATTAGTTGAGAGGTTAAGTAGTAGTTTATTTTAGCCACCCTGAGACACGCAACAGGAGATAGGCAAGGCACACAGGGGATAGG
+>003592_0076_0430 length=134 uaccno=E0R4ISW01AGYTC
+AATTAGTTAGGCGTGGTGGCGGGTGTTTGTAGTTTTAGTTATTCGGGAGGTTGAGGTAGGAGAATGTTGTGAATTTAGGAGGTGGAGTTTGCTGAGACACGCAACAGGGGAAGGCAAGGCACACAGGGGATAGG
+>003957_0595_0965 length=173 uaccno=E0R4ISW01BQJIV
+TAATATTAGGTGTCAATTTGACTGGATCGAGGGATGTGTGTCGGTGAGAGTCTCACTAGAGGTTGATATTTGAGTCGTTAGACTGGGAGAGGAAGACCGAACTGTCAAGTGTATGGGCGCCATCCAATTCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>003986_1127_2937 length=103 uaccno=E0R4ISW01C1AFF
+TAATGGAATGGAATTTTCGGAATGGAATGGAATGGAATGGAATGGAATGGAATGGAATTACTGAGACACGCAACAGGGGAAGGCAAGGCACACAGGGGATAGG
+>004012_1559_1491 length=111 uaccno=E0R4ISW01D26M9
+TAGTGGATATAAATGGAATGGATTGGAATGGAATGGATACGAATGGAATGGATTGGAGTGGAATGGATTGACTGAGACACGCAACAGGGGGCAAGGCACACAGGGGATAGG
+>004030_1508_2061 length=166 uaccno=E0R4ISW01DYPWF
+TACGTATATACGCGTACGCGTATACGTATATACGCGTATACGTATACGCGTACGTATATATACGCGTATACGTTTACGTACGTACGCGTATATACGTACGTATACACACACGCATATGCATACTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>004038_1061_2047 length=152 uaccno=E0R4ISW01CVG5D
+AATTGATTCGAATGGAATGGATTGGAATGGAACGGATTTGAATGGAATGGATTGGAATGGAATGGATTGAATGGAATGGATTGGAGAGGATTGGATTTGAATGGAATTCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>004105_1121_0391 length=135 uaccno=E0R4ISW01C0PH1
+AATTAGTTGGGCGTGGTGGCGAGTGTTTGTAATTTTAGTTATTTAGGAGGTTGAGGTAGGAGAATTATTTGAACCCGGTAGACGGAAGTTGCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>004129_1618_3423 length=122 uaccno=E0R4ISW01D8ELT
+AATTGAATGGTATTGAAAGGTATTAATTTAGTGGAATGGAATGGAATGTATTGGAATGGAAAATAATGGAATGGAGTGCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>004203_0451_0902 length=115 uaccno=E0R4ISW01BDWC4
+TAGTTGGTGTGTTGTAATCGAGACGTAGTTGGTTGGTACGGGTTAGGGTTTTGATTGGGTTGTTGTGTTTGCTGAGACACGCAACATGGGATAGGCAAGGCACACAGGGGATAGG
+>004626_1937_0919 length=223 uaccno=E0R4ISW01E0CVD
+TAGAGTAGATAGTAGGGTTAGAGAAGGTAGGGTACGTTTAGTTTGTTAGTAAGGTTTAAGTTTTGGGTGGGAAAGGTTAGTGGCGGGAAGGGACGAAGGTGGTAATCGAGAGTAGATTTAGAGAAGTTTTTGAAGTGGGCGTTGGGAGTTTTCGAAGTATTGAGAGAGAGGAGCTTGTGCTGAGACATGCAACAGAGGATAGGCAAGGCACACAGGGGATAGG
+>004913_0641_2071 length=135 uaccno=E0R4ISW01BULRD
+AATTAGTCGAGCGTTGTGGTGGGTATTTGTAATTTTAGCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGGAGGTTGCTGAGACACGCAACAGGAGATAGGCAAGGCACACAGGGGATAGG
+>005063_0599_1983 length=127 uaccno=E0R4ISW01BQWX9
+ATGTGGTGAAGATTGGTTTTAGGTGTTTTAATGTGGATTTTCAGGGGTTTTAAAAGGGTTGGGAGAGTGAAATATATATAAGGCTGAGACACGCAAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>005140_0759_3209 length=116 uaccno=E0R4ISW01B4ZKR
+TAGTATAGAGGGTTTGTGGTCGTGAGGGTGTTGATGGCGGGAGGGTTTTGATGGTAGGAGGGCCCGTGCTGTGCTGAGACACGCAACAGGGGAAGGCAAGGCACACAGGGGATAGG
+>005351_0883_3221 length=137 uaccno=E0R4ISW01CFVHJ
+TTAGGTGTTATAGTTGAGTGAGATGTTAGTGTTTAATGGTTTTATTTAGGTTGATGGGTTAATGAGGGGGTATTTGATAGTTTTGAAGATTTGACTGAGACACGCAACGGGGATAGGCAAGGCACACAGGGGATAGG
+>005380_1702_1187 length=207 uaccno=E0R4ISW01EFQC1
+TAGGGTTTTTCGAGTATATATTTAGTAGTACGCTCGACTTCTCTTATATAAAGGTTTTGGTTTTTATAGGTTTTTCCATTGTGTCTGCCTGGGGGAGGGCCCTTCTCCTTCAGGATACTGTAGCTTCTCTGCGTGATAAGCCAGCATTCACGGCTTTCAGGTGCTGAGACATGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>005568_1060_1943 length=63 uaccno=E0R4ISW01CVDWP
+ATAGCGTATTTCTCACCTGCTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>005740_1536_2697 length=159 uaccno=E0R4ISW01D06VV
+TAAAGAGGTGTTATTATTAGTTAGGAGAGGAGGTGGTTAGATAGTAGTGGGATTATAGGGGAATATAGAGTTGTTAGTTTAGGGATAAGGGATTGATCGATGGGTTAGGTCTCTGCTGAGACACGCAAAAGGGGATAGGCAAGGCACACAGGGGATAGG
+>005753_1884_3877 length=95 uaccno=E0R4ISW01EVRNB
+AAACTGAGTTGTGATGTTTGCATTCAACTCACAGAGTTCAACATTCCTTTAACTGAGACACGCAACAGGGTTAGGCAAGGCACACAGGGTATAGG
+>read_equals_adapter 1a
+TGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>read_equals_start_of_adapter 1b
+TGAGACACGCAACAGGGGAAAG
+>read_equals_end_of_adapter 1c
+GAAAGGCAAGGCACACAGGGGATAGG
+>read_equals_middle_of_adapter 1d
+GCAACAGGGGAAAGGCAAGGCACACAGG
+>read_ends_with_adapter 2a
+GCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG
+>read_ends_with_start_of_adapter 2b
+GCTACTCTGAAGGCTGAGGCAGGAGAACTGCTTGAACCCGGGAGGCGTGAGACACGCAACAGGGGAAAGGCAAGG
+>read_contains_adapter_in_the_middle 3
+CGTAGTTGGTTGGTACGTGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGGGGTTAGGGTTTTGATTGGGTTGT
+>read_starts_with_adapter 4a
+TGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGGAAAGGTTTTGGTTTTTATAGGTTTTT
+>read_starts_with_end_of_adapter 4b
+AACAGGGGAAAGGCAAGGCACACAGGGGATAGGAAAGGTTTTGGTTTTTATAGGTTTTT
diff --git a/tests/data/E3M.fasta b/tests/data/E3M.fasta
new file mode 100644
index 0000000..daa7686
--- /dev/null
+++ b/tests/data/E3M.fasta
@@ -0,0 +1,59 @@
+>E3MFGYR02JWQ7T length=260 xy=3946_2103 region=2 run=R_2008_01_09_16_16_00_
+tcagGGTCTACATGTTGGTTAACCCGTACTGATTTGAATTGGCTCTTTGTCTTTCCAAAG
+GGAATTCATCTTCTTATGGCACACATAAAGGATAAATACAAGAATCTTCCTATTTACATC
+ACTGAAAATGGCATGGCTGAATCAAGGAATGACTCAATACCAGTCAATGAAGCCCGCAAG
+GATAGTATAAGGATTAGATACCATGATGGCCATCTTAAATTCCTTCTTCAAGCGATCAAG
+GAAGGTGTTAATTTGAAGGGGCTTa
+>E3MFGYR02JA6IL length=265 xy=3700_3115 region=2 run=R_2008_01_09_16_16_00_
+tcagTTTTTTTTGGAAAGGAAAACGGACGTACTCATAGATGGATCATACTGACGTTAGGA
+AAATAATTCATAAGACAATAAGGAAACAAAGTGTAAAAAAAAAACCTAAATGCTCAAGGA
+AAATACATAGCCATCTGAACAGATTTCTGCTGGAAGCCACATTTCTCGTAGAACGCCTTG
+TTCTCGACGCTGCAATCAAGAATCACCTTGTAGCATCCCATTGAACGCGCATGCTCCGTG
+AGGAACTTGATGATTCTCTTTCCCAAATGcc
+>E3MFGYR02JHD4H length=292 xy=3771_2095 region=2 run=R_2008_01_09_16_16_00_
+tcagAAAGACAAGTGGTATCAACGCAGAGTGGCCATTACGCCGGGGACTAGGTCATGTTA
+AGAGTGTAGCTTTGTGATGCTCTGCATCCGTCTTATGATAAAATTGAGGTTATCCTGAAA
+TAAAGTGTCTCAAACGATTTATTTTCCATTTATTGTATTTAATTTGAGTCCAAACTAGAT
+TAGAGATCTCTGTAATAAAACATGTTTGTTAGTTTAATTTCAATAACATTTAGTATTGTG
+TCGTAAAAAAAAAAAAAACGAAAAAAAAAAAAACAAAAAAAAAAACAAATGTACGGccgg
+ctagagaacg
+>E3MFGYR02GFKUC length=295 xy=2520_2738 region=2 run=R_2008_01_09_16_16_00_
+tcagCGGCCGGGCCTCTCATCGGTGGTGGAATCACTGGCCTTGTTTACGAGGTTGTCTTT
+ATCAGCCACACCCACGAGCAGCTTCCCACCACTGACTACTAGAGGGGGGGAAATGAAAAA
+TAAAAAAAAAAAATTGTGTATTATTGAATTTCTCTGGAATCTTCTTCTGTGTATGGTTTT
+CCTTCCTTGTGTTTTCTTCCTAATTCACTTTCGAGGGTTGTACTTGTTCCTTTCGTCTTA
+AATCCTTGGATGGTTGATGATCATGAAGTTCTCTTTAAAGTTAAATTATTATCATTTTG
+>E3MFGYR02FTGED length=277 xy=2268_2739 region=2 run=R_2008_01_09_16_16_00_
+tcagTGGTAATGGGGGGAAATTTAATTTTCTGATTTTATTATATATAGTTAATTGATGCT
+TTCGACGGTTTATATTTATGCGATTTGGTTTAGGTTTCAATGGAATTTTGTTGGTAGTTT
+ATATGATTGTATATAGTTATCAGCAACCTTATATTGTTTGCTTGCCTTTCTAGAGCACTC
+AGTGGAGATTTGAAACTTTGTTAGTGGAAAATTTGCAATTGTATGTTAATTGGAGATGGA
+GACAAAAAAGGAGGCAGATATTAATATTTATTTGGATATCA
+>E3MFGYR02FR9G7 length=256 xy=2255_0361 region=2 run=R_2008_01_09_16_16_00_
+tcagCTCCGTAAGAAGGTGCTGCCCGCCGTCATCGTCCGCCAGCGCAAGCCTTGGCGCCG
+AAAGGACGGTGTTTACATGTACTTCGAAGATAATGCTGGTGTTATCGTGAATCCCAAGGG
+TGAAATGAAAGGTTCTGCTATCACTGGTCCAATTGGGAAGGAGTGTGCTGATCTGTGGCC
+CAGGATTGCAAGTGCTGCCAATGCTATTGTTTAAGCTAGGATTTTAGTTTTTGTAATGTT
+TCAGCTTCTTGAAGTTGTTTc
+>E3MFGYR02GAZMS length=271 xy=2468_1618 region=2 run=R_2008_01_09_16_16_00_
+tcagAAAGAAGTAAGGTAAATAACAAACGACAGAGTGGCACATACTCCGGCAGTTCATGG
+GCAGTGACCCAGTTCAGAGAACCAAAGAACCTGAATAAGAATCTATGTCTACTGTGAATT
+TTGTGGCTTTCGTTGGAACGAAGGTAGCTTCGAAACAATAAAGTTATCTACTTCGCAATA
+TGAAGTGTTTCTGTTAGTTCTATGGTTCCTACTCCTAGCACCTCTTTTTCTTATAGAAAT
+GGACCACCGTGATTGGTACAAAAGNTGTACCTAGAtga
+>E3MFGYR02HHZ8O length=150 xy=2958_1574 region=2 run=R_2008_01_09_16_16_00_
+tcagACTTTCTTCTTTACCGTAACGTTGTTAAATTATCTGAGTATATGAAGGACCCTATT
+TGGGTTCTATAACTACAGAACATATCTCAGTCCAATAGTGACGGAATAACAATATTATAA
+ACTAGTTTAACGCTTTATGAAAAAAAAAAAAAAAgaaaaaaaaacatgtcggccgctgag
+acacgcaacaggggataggcaaggcacacaggggataggnn
+>E3MFGYR02GPGB1 length=221 xy=2633_0607 region=2 run=R_2008_01_09_16_16_00_
+tcagAAGCAGTGGTATCAACGCAGAGTGGCCATTACGGCCGGGTCTGATGAGTATGTGTC
+GAAGATCCCAAATAACAAGGTTGGTCTTGTAATTGGTAAAGGTGGAGAAACAATAAAGAA
+TATGCAAGCTTCAACTGGAGCAAGAATTCAGGTGATTCCTCTTCATCTTCCACCTGGTGA
+CACATCTACCAAAAAAAAAAAAAAAAAAAAACCAAATGTCGGCCGctgagacacgcaaca
+gggataggcaaggcacacaggggataggn
+>E3MFGYR02F7Z7G length=130 xy=2434_1658 region=2 run=R_2008_01_09_16_16_00_
+tcagAATCATCCACTTTTTAACGTTTTGTTTTGTTCATCTCTTAACAACAATTCTAGGGC
+GACAGAGAGAGTAAGTACCCACTAACCAGTCCCCAAGTACCAAAATAACAATTTAAACAA
+CAAAACACAAACAGatcttatcaacaaaactcaaagttcctaactgagacacgcaacagg
+ggataagacaaggcacacaggggataggnnnnnnnnnnn
diff --git a/tests/data/E3M.qual b/tests/data/E3M.qual
new file mode 100644
index 0000000..908e628
--- /dev/null
+++ b/tests/data/E3M.qual
@@ -0,0 +1,59 @@
+>E3MFGYR02JWQ7T length=260 xy=3946_2103 region=2 run=R_2008_01_09_16_16_00_
+23 24 26 38 31 11 27 28 25 28 22 25 27 28 36 27 32 22 33 23 27 16 40 33 18 28 28 24 25 20 26 26 37 31 10 21 27 16 36 28 32 22 27 26 28 37 30 9 28 27 26 36 29 8 33 23 37 30 9 37
+30 9 34 26 32 22 28 28 28 22 33 23 28 31 21 28 26 33 23 28 27 28 28 28 21 25 37 33 16 34 28 25 28 37 33 17 28 28 27 34 27 25 30 25 26 24 34 27 34 27 23 28 36 32 14 24 28 27 27 23
+26 25 27 25 36 32 18 1 27 29 21 26 24 27 31 22 27 26 26 34 26 28 27 33 26 34 26 33 26 28 26 27 27 27 27 28 19 25 25 31 23 28 28 28 27 33 26 26 26 27 18 21 35 31 12 21 28 34 28 32
+26 27 27 23 25 27 28 26 34 28 34 28 27 34 28 28 26 28 26 19 32 27 28 25 27 27 26 33 25 34 28 24 28 21 30 21 37 33 16 23 12 27 18 27 18 25 34 28 24 30 22 22 23 28 27 25 26 34 28 33
+26 19 6 34 28 25 25 32 27 34 28 37 33 17 25 34 28 36 32 18 2 17 24 14 17
+>E3MFGYR02JA6IL length=265 xy=3700_3115 region=2 run=R_2008_01_09_16_16_00_
+24 24 26 28 45 32 22 17 12 9 5 1 36 28 40 34 15 36 27 42 35 21 6 28 34 24 27 28 28 21 28 28 28 28 25 27 28 28 28 27 36 28 27 28 28 24 28 28 28 28 28 24 28 28 36 27 28 36 28 43
+36 22 10 28 19 5 36 28 28 25 28 37 28 28 12 28 33 26 28 24 11 35 26 41 34 15 27 40 33 18 28 28 24 24 44 26 17 13 10 7 6 4 2 1 22 9 27 36 33 17 27 26 26 27 28 30 22 33 26 36
+33 19 4 25 18 27 24 22 24 26 31 23 27 24 28 25 25 31 23 27 27 28 26 32 28 7 27 23 24 25 26 33 25 32 24 24 34 26 25 23 27 33 29 8 25 25 26 25 26 25 27 29 20 28 26 32 24 33 25 25
+29 20 24 26 28 23 25 26 26 27 25 27 27 27 18 27 28 31 23 27 31 23 27 23 27 33 27 34 27 27 26 28 26 27 28 27 37 33 15 24 33 26 27 27 18 26 25 27 27 27 25 28 26 27 25 34 28 27 24 27
+25 34 28 31 23 22 34 28 26 27 27 28 27 34 28 25 25 23 36 32 14 37 33 17 37 33 17 23 25 25 15
+>E3MFGYR02JHD4H length=292 xy=3771_2095 region=2 run=R_2008_01_09_16_16_00_
+19 23 27 28 41 34 16 27 27 27 27 16 28 22 33 23 23 28 27 27 36 28 28 28 28 22 26 26 28 26 34 24 36 27 26 37 28 28 27 28 36 28 43 36 22 9 24 21 26 28 36 27 27 28 28 28 27 37 28 36
+27 28 24 28 27 27 28 24 28 28 40 33 14 26 21 28 27 28 27 28 23 27 27 28 27 27 26 33 25 27 26 25 34 27 28 28 27 28 28 38 34 22 10 34 28 27 27 34 27 34 28 27 27 33 27 27 28 35 30 11
+28 37 33 17 27 28 26 27 27 23 25 36 32 14 27 27 24 32 28 7 28 36 32 19 3 30 21 22 37 33 15 21 34 27 28 22 26 36 33 17 34 28 37 33 17 26 21 26 24 34 27 35 31 12 20 27 27 28 25 34
+28 27 25 27 27 25 27 28 27 28 23 28 27 28 20 28 38 34 22 9 23 24 28 28 36 32 13 27 19 7 20 26 37 33 17 21 9 37 33 17 23 32 25 22 29 21 27 24 34 30 10 28 26 25 28 33 26 23 21 27
+28 27 26 23 32 20 11 7 5 3 2 1 1 1 1 1 1 1 20 25 33 21 13 8 6 4 3 2 2 1 1 1 1 23 34 25 16 11 9 7 5 4 3 1 1 21 37 33 17 21 27 25 28 28 34 27 32 27 21 9
+17 25 20 27 18 17 32 24 17 16
+>E3MFGYR02GFKUC length=295 xy=2520_2738 region=2 run=R_2008_01_09_16_16_00_
+24 23 24 27 28 36 28 37 28 39 32 13 34 25 22 28 27 28 26 28 28 37 28 28 36 28 26 36 28 36 28 27 28 27 28 26 36 28 36 28 35 26 28 41 34 17 28 28 28 27 36 28 37 28 28 27 28 41 34 16
+25 28 28 26 27 36 28 28 27 28 41 34 17 28 25 28 28 27 28 27 26 27 34 27 37 33 17 25 33 27 26 27 27 28 25 28 28 27 27 25 27 26 28 38 32 23 17 12 8 2 37 33 17 28 26 38 34 23 12 1
+28 34 23 15 10 8 6 4 3 2 1 1 1 31 23 28 26 26 28 26 34 27 24 34 27 28 34 27 37 33 16 27 24 25 28 34 27 34 27 28 28 34 26 26 34 28 27 27 28 27 28 27 28 28 34 28 38 34 23 11
+34 28 34 27 34 26 34 28 28 27 26 38 35 22 9 27 30 22 33 26 28 34 28 34 28 28 27 28 37 33 15 25 27 23 32 27 6 32 25 28 22 26 26 32 24 27 33 26 26 17 34 30 11 28 26 27 22 33 26 34
+30 10 26 30 22 34 28 33 25 26 27 34 28 31 26 24 28 28 28 28 26 28 28 27 28 32 24 26 34 26 27 28 26 34 30 10 32 28 7 27 33 25 35 31 12 34 27 25 30 22 23 28 27 23 38 34 23 11 26
+>E3MFGYR02FTGED length=277 xy=2268_2739 region=2 run=R_2008_01_09_16_16_00_
+21 24 28 24 28 35 27 28 35 28 28 44 35 24 16 9 2 41 34 17 40 34 15 34 26 43 36 22 9 28 25 26 26 41 34 20 5 26 37 28 27 27 28 28 28 28 28 28 37 28 36 28 37 28 28 28 27 26 26 38
+31 11 28 24 28 28 36 27 36 29 8 26 27 28 36 29 8 27 28 27 28 28 24 34 27 5 32 22 40 33 14 28 37 28 41 34 16 28 32 24 23 34 28 34 27 38 34 22 9 27 34 28 34 27 27 26 26 36 32 13
+28 27 26 28 28 25 34 26 27 28 28 27 28 23 27 28 34 26 27 25 27 26 28 23 32 24 34 28 33 26 28 26 27 27 18 25 36 32 13 27 27 32 24 27 32 25 35 31 12 27 28 26 27 21 27 27 27 26 28 28
+27 26 28 33 25 22 28 28 37 33 17 26 37 33 17 20 36 32 14 28 34 27 26 27 28 34 28 38 34 22 8 37 33 15 27 28 34 27 33 26 27 26 27 28 28 33 25 34 28 34 28 34 26 24 24 28 25 34 28 28
+27 25 23 38 33 24 17 11 5 34 28 25 31 26 22 27 27 27 26 22 34 26 34 27 26 24 34 30 11 19 37 33 15 34 28 27 25 28 25 27 27
+>E3MFGYR02FR9G7 length=256 xy=2255_0361 region=2 run=R_2008_01_09_16_16_00_
+21 22 26 28 28 24 35 26 27 28 36 28 28 37 28 36 27 28 28 26 25 24 37 30 9 28 36 28 28 21 28 26 28 28 28 28 36 28 28 35 26 27 25 25 28 28 36 28 23 31 20 32 22 29 18 27 27 34 25 28
+39 33 13 36 27 28 28 35 25 28 28 40 34 15 27 28 28 27 27 28 28 28 34 28 27 27 34 28 27 27 27 34 27 28 28 28 27 34 27 27 28 34 26 28 27 27 27 27 28 34 27 27 35 31 11 34 27 34 30 10
+28 27 34 30 10 27 28 37 33 15 33 25 33 26 26 28 26 27 27 27 28 26 26 28 27 34 27 26 31 23 34 28 34 28 37 33 15 34 28 34 28 27 23 27 28 27 27 28 23 28 27 25 27 24 27 22 34 28 37 33
+16 26 33 26 25 34 26 25 28 33 25 27 27 23 27 28 28 32 24 34 27 27 27 27 28 27 29 20 27 33 28 8 32 27 23 28 25 24 34 28 26 38 34 22 9 27 26 38 34 23 13 3 27 26 34 28 26 28 36 32
+14 23 28 27 20 33 25 28 30 22 26 33 25 23 34 28 23 34 30 10 27
+>E3MFGYR02GAZMS length=271 xy=2468_1618 region=2 run=R_2008_01_09_16_16_00_
+18 25 28 28 40 34 17 19 33 26 21 17 34 24 31 21 28 41 34 17 28 37 28 28 41 34 17 27 27 21 28 18 24 23 26 25 31 20 28 26 27 28 23 25 27 25 33 23 30 20 28 28 26 31 21 27 28 23 38 31
+11 28 28 28 28 28 26 39 33 13 28 28 35 25 28 26 28 27 28 35 26 36 27 35 31 11 28 32 24 34 28 26 25 34 28 28 34 28 24 33 25 27 27 28 26 27 27 26 27 27 27 27 27 26 27 28 34 27 38 34
+22 10 25 23 32 25 28 37 33 16 26 26 29 20 33 26 27 18 27 25 23 13 32 24 27 22 24 27 34 28 27 27 36 32 14 27 27 18 26 33 29 8 28 34 27 23 26 28 27 28 27 32 24 28 27 23 34 26 25 27
+27 24 34 28 26 25 27 36 32 17 25 25 27 33 27 27 27 34 28 28 28 27 25 34 28 33 27 34 28 28 27 23 25 34 28 27 27 27 28 27 34 27 20 23 38 34 24 15 7 26 22 11 28 27 23 26 36 32 14 22
+34 28 28 33 27 27 30 22 25 22 24 27 34 28 34 28 26 26 27 37 33 20 6 28 0 25 28 27 24 34 28 25 28 28 27 25 26 26
+>E3MFGYR02HHZ8O length=150 xy=2958_1574 region=2 run=R_2008_01_09_16_16_00_
+22 22 25 23 25 28 41 34 17 28 37 28 28 35 28 6 24 30 19 28 25 32 22 27 25 37 28 28 27 15 38 31 11 36 28 27 24 28 28 27 20 28 23 26 25 22 19 28 35 26 34 25 26 41 34 17 26 28 36 29
+7 36 29 8 35 26 28 28 28 24 33 23 28 24 27 27 23 25 34 24 26 24 28 27 22 28 26 28 24 27 28 34 27 34 27 26 27 28 26 27 28 28 34 28 31 23 25 30 22 27 29 21 26 27 34 27 28 26 37 33
+17 17 26 18 28 34 30 11 19 6 27 24 27 35 30 11 27 22 28 32 19 11 6 4 3 2 1 1 1 1 1 1 1 1 27 36 28 19 14 11 8 6 4 2 19 19 27 27 28 27 33 26 33 26 25 27 25 28 26 22
+28 25 27 27 28 25 34 28 28 24 38 34 21 7 28 25 17 33 26 26 31 26 34 27 27 27 27 26 26 28 38 34 23 12 27 28 25 33 27 0 0
+>E3MFGYR02GPGB1 length=221 xy=2633_0607 region=2 run=R_2008_01_09_16_16_00_
+21 24 27 28 36 28 28 28 26 28 28 36 28 28 27 24 28 36 27 28 28 28 23 27 27 28 28 37 28 36 27 27 37 28 28 28 37 28 36 27 41 34 17 28 28 28 28 27 28 28 28 26 28 28 28 28 28 28 28 28
+28 37 28 28 27 28 39 32 13 41 34 16 28 37 28 28 34 28 34 28 34 28 34 27 27 26 34 28 27 27 34 28 34 28 34 28 27 37 33 15 34 27 28 34 28 28 28 37 33 16 28 34 26 27 37 33 16 27 34 27
+26 27 27 27 28 34 28 26 23 34 27 25 34 27 28 26 34 28 27 25 28 34 27 27 33 26 34 28 27 28 34 27 27 27 27 34 28 34 27 25 26 34 27 26 24 27 28 34 27 32 24 27 31 23 28 34 27 27 25 28
+27 25 27 27 27 28 27 17 32 24 35 16 8 4 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 21 9 36 31 13 24 27 26 28 34 28 34 27 19 22 23 19 28 28 26 26 20 23 22 26 34 27 25 25
+36 32 17 27 27 24 24 14 21 34 27 31 23 23 28 22 27 27 28 36 32 18 2 27 27 22 25 15 0
+>E3MFGYR02F7Z7G length=130 xy=2434_1658 region=2 run=R_2008_01_09_16_16_00_
+22 21 23 28 26 15 12 21 28 21 36 28 27 27 43 35 23 12 1 36 28 27 27 41 34 20 5 28 43 36 22 9 27 35 26 28 26 27 26 28 22 33 26 37 28 26 36 27 28 35 27 31 20 26 28 13 38 32 12 26
+23 24 27 28 27 22 25 28 19 27 28 20 36 27 25 20 26 41 34 17 28 28 17 36 28 35 27 20 28 28 43 36 22 8 33 26 25 27 27 31 26 38 34 22 10 25 34 28 26 34 27 32 27 5 37 33 17 20 23 13
+27 37 33 19 4 27 28 20 37 33 17 24 26 23 27 21 26 33 26 26 27 28 34 27 21 38 34 21 7 28 25 24 37 33 17 28 34 28 32 24 27 33 27 27 20 28 27 27 22 28 19 25 22 28 32 26 27 23 37 33
+20 5 24 24 34 28 28 11 26 30 25 33 26 28 25 22 26 27 27 38 34 23 11 28 26 28 34 26 0 0 0 0 0 0 0 0 0 0 0
diff --git a/tests/data/adapter.fasta b/tests/data/adapter.fasta
new file mode 100644
index 0000000..3519ebc
--- /dev/null
+++ b/tests/data/adapter.fasta
@@ -0,0 +1,4 @@
+>adapter1
+GCCGAACTTCTTAGACTGCCTTAAGGACGT
+>adapter2
+CAGGTATATCGA
diff --git a/tests/data/anchored-back.fasta b/tests/data/anchored-back.fasta
new file mode 100644
index 0000000..651f3fb
--- /dev/null
+++ b/tests/data/anchored-back.fasta
@@ -0,0 +1,8 @@
+>read1
+sequenceBACKADAPTER
+>read2
+sequenceBACKADAPTERblabla
+>read3
+sequenceBACKADA
+>read4
+sequenceBECKADAPTER
diff --git a/tests/data/anchored.fasta b/tests/data/anchored.fasta
new file mode 100644
index 0000000..2af20a4
--- /dev/null
+++ b/tests/data/anchored.fasta
@@ -0,0 +1,8 @@
+>read1
+FRONTADAPTsequence
+>read2
+blablaFRONTADAPTsequence
+>read3
+NTADAPTsequence
+>read4
+FRINTADAPTsequence
diff --git a/tests/data/anchored_no_indels.fasta b/tests/data/anchored_no_indels.fasta
new file mode 100644
index 0000000..dcf626a
--- /dev/null
+++ b/tests/data/anchored_no_indels.fasta
@@ -0,0 +1,12 @@
+>no_mismatch (adapter: TTAGACATAT)
+TTAGACATATGAGGTCAG
+>one_mismatch
+TAAGACATATGAGGTCAG
+>two_mismatches
+TAAGACGTATGAGGTCAG
+>insertion
+ATTAGACATATGAGGTCAG
+>deletion
+TAGACATATGAGGTCAG
+>mismatch_plus_wildcard
+TNAGACGTATGAGGTCAG
diff --git a/tests/data/anywhere_repeat.fastq b/tests/data/anywhere_repeat.fastq
new file mode 100644
index 0000000..120d100
--- /dev/null
+++ b/tests/data/anywhere_repeat.fastq
@@ -0,0 +1,28 @@
+ at prefix:1_13_1400/1
+CGTCCGAANTAGCTACCACCCTGATTAGACAAAT
++
+)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1500/1
+CAAGACAAGACCTGCCACATTGCCCTAGTATTAA
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1550/1
+CAAGACAAGACCTGCCACATTGCCCTAGTCAAGA
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1600/1
+CAAGATGTCCCCTGCCACATTGCCCTAGTCAAGA
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1700/1
+CAAGATGTCCCCTGCCACATTGCCCTAGTTTATT
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1800/1
+GTTCATGTCCCCTGCCACATTGCCCTAGTTTATT
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
+ at prefix:1_13_1900/1
+ATGGCTGTCCCCTGCCACATTGCCCTAGTCAAGA
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
\ No newline at end of file
diff --git a/tests/data/dos.fastq b/tests/data/dos.fastq
new file mode 100644
index 0000000..6b1ecec
--- /dev/null
+++ b/tests/data/dos.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGATTAGACAAAT
++
+)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT
++
+;<:&:A;A!9<<<,7:<=3=;:<&<?<?8<;=<&
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/data/empty.fastq b/tests/data/empty.fastq
new file mode 100644
index 0000000..e69de29
diff --git a/tests/data/example.fa b/tests/data/example.fa
new file mode 100644
index 0000000..b1fc713
--- /dev/null
+++ b/tests/data/example.fa
@@ -0,0 +1,18 @@
+>read1
+MYSEQUENCEADAPTER
+>read2
+MYSEQUENCEADAP
+>read3
+MYSEQUENCEADAPTERSOMETHINGELSE
+>read4
+MYSEQUENCEADABTER
+>read5
+MYSEQUENCEADAPTR
+>read6
+MYSEQUENCEADAPPTER
+>read7
+ADAPTERMYSEQUENCE
+>read8
+PTERMYSEQUENCE
+>read9
+SOMETHINGADAPTERMYSEQUENCE
diff --git a/tests/data/illumina.fastq.gz b/tests/data/illumina.fastq.gz
new file mode 100644
index 0000000..23f9a93
Binary files /dev/null and b/tests/data/illumina.fastq.gz differ
diff --git a/tests/data/illumina5.fastq b/tests/data/illumina5.fastq
new file mode 100644
index 0000000..c915c8d
--- /dev/null
+++ b/tests/data/illumina5.fastq
@@ -0,0 +1,20 @@
+ at SEQ:1:1101:9010:3891#0/1 adapter start: 51
+ATAACCGGAGTAGTTGAAATGGTAATAAGACGACCAATCTGACCAGCAAGGGCCTAACTTCTTAGACTGCCTTAAGGACGTAAGCCAAGATGGGAAAGGTC
++
+FFFFFEDBE at 79@@>@CBCBFDBDFDDDDD<@C>ADD at B;5:978 at CBDDFFDB4B?DB21;84?DDBC9DEBAB;=@<@@B@@@@B>CCBBDE98>>0 at 7
+ at SEQ:1:1101:9240:3898#0/1
+CCAGCAAGGAAGCCAAGATGGGAAAGGTCATGCGGCATACGCTCGGCGCCAGTTTGAATATTAGACATAATTTATCCTCAAGTAAGGGGCCGAAGCCCCTG
++
+GHGHGHHHHGGGDHHGDCGFEEFHHGDFGEHHGFHHHHHGHEAFDHHGFHHEEFHGHFHHFHGEHFBHHFHHHH at GGGDGDFEEFC@=D?GBGFGF:FB6D
+ at SEQ:1:1101:9207:3899#0/1 adapter start: 64
+TTAACTTCTCAGTAACAGATACAAACTCATCACGAACGTCAGAAGCAGCCTTATGGCCGTCAACGCCTAACTTCTTAGACTGCCTTAAGGACGTATACATA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHFHHHHHHCFHHFHHFHFFFFFBHHGHHHFFHHFHGGHHDEBFG<FGGDG
+ at SEQ:1:1101:9148:3908#0/1 adapter start: 28
+ACGACGCAATGGAGAAAGACGGAGAGCGGCCTAACTTCTTAGACTGCCTTAAGGACGTCCAACGGCGTCCATCTCGAAGGAGTCGCCAGCGATAACCGGAG
++
+HHHHHHHHHHHHGHHHHGHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHDHDHHFHHHHHFFFFFHHHEFBEGEGGFFFHHHFHHHHHHFHHEHHGHEHD
+ at SEQ:1:1101:9044:3916#0/1 adapter start: 78
+AACAGAAGGAGTCTACTGCTCGCGTTGCGTCTATTATGGAAAACACCAATCTTTCCAAGCAACAGCAGGTTTCCGAGAGCCTAACTTCTTAGACTGCCTTA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHHGHHHHGHHHHHHHHHHHHFHEBFHFFEFHEFHHGHFHHHHGGHGHHFHGGGHG
diff --git a/tests/data/illumina64.fastq b/tests/data/illumina64.fastq
new file mode 100644
index 0000000..bc5b102
--- /dev/null
+++ b/tests/data/illumina64.fastq
@@ -0,0 +1,80 @@
+ at 14569
+AAGTTTATTCCTGGACGAAGGAAGAAAAGGCCAGATGGGAAACAAGAACAAGCCCCTGTTGAAGACGCAGGGCCAACAGGGGCCAACGAAGCTGC
++
+cceeeeceeeee`dedbdbdb_^b`abU_cacadabd`dLMZ[XTcT^a^adaaaddcd`aL^`^_`Y\]^`Y_BBBBBBBBBBBBBBBBBBBBB
+ at 19211
+AGAGGGCGTGTGATTGCTGGATGTGGGCGGGGGGCCGGGGGAGCCCCATGGGCAGGAGACCTGAGAGCCAGGCGGTGAGGCACTATGAACGCGAG
++
+^\`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 9180
+GAGGGGCAGCGACTAGTCACCGGACCTGTCAGGCAAGCATAAGCCGTGCGTCAGCACCACGCTGACGGTGCTCCCGCACTCGCGGGACGCGCCAC
++
+b`bLbBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 19132
+TGTGATTATCCACTGGTATATCGGCGTGCCGTCCGCACGAGGAAAAAAGGCATTATTGTTGTGGATCTGTACCATCGTTTGTCCCGTTACCCTTC
++
+Z[QZZLZ[]J[SHZNaZ[_IaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 15868
+CTGCCAAGGCTGCCCCCAAACCTGGCCCTCCGCGCACCCCACCACGGATCCTGACGTCCTGTCCCCCGCGGCTATGACAGCCAAGTCCCGTCAGC
++
+`c`cc\`\Lb]bL`[`a]L`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 1424
+GGCCCCAGACTTGCTCCCCCAACAAGGACAATGTCCAAGGAGTGTCCCCTGGGAAGGGTGGGCCTCCCCAGGTGCGGGCGGTGGGCACTGCCCCC
++
+eeeeeeeea`bbdaaadad`Oaaaaccada_aa_d`_X`_^`[`_[_W^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 7855
+GTGGGGGCTACAATGTGGCTCCAAGTTTTTTCCCGGGAGGTAAGGCCGGGAGCCCCCGCCCTGAGGGGGCGGGAAAGAGGAAGCCCGACGCGGAC
++
+]^\]FW]Z`BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 17943
+ACATGGGACCAGAAAACACCACCAGGGGTTTGGGGCTGTCCTGAGGCTCGGGTAGCAAGCAGCGGGGCTCCGTGTCCAAGCACGCCGGTGTCACC
++
+ccc`\^`aba\b^`\FR`OOPYG[[W```[Ra_RR_\]\\P\_H_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 11100
+CGGATAACTGAAAATGCATTTTTAACGCCATGACCGTGTCTCAAGGACCCGCTGTGGAAGGGGCGCCGCAGCCAGAAGCTGGCCATGTCAGCGCG
++
+b`b_b_a\bc^Tabadaddcddd``bdaa_^aJ\^_\]\\__O[___L^\_aaa^^^UJ^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 15663
+AGGTGAAGTGGCAGGAGGACCGCCGGAAGAAGCTCTTCAGAACTCAGGGGGAGGGGGAAAGCAGAAACCAGAAGTCCAGTGAGCAGGGGGCTGAG
++
+aaKaBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 4698
+CCAATTGGCACCCCTCTGCCTTCAGCCATTCCCTCTGGCTACTGCTCTCTGGTCGGGGCGCCTGGGCGACAGACTCTCTCCCCCCACCCCCCCGC
++
+cccc\`ccc\caccZccccc]^`LY\bL_bBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 20649
+TCTGGACTGGATCTTTAGGATGGTGGAGATGATCTGGATGTAGGACAAAAGAACCAGGCAGAAGGGTGTCATCAGAAGAACACTGCTAGACACCA
++
+eeeeeaddadacdddebeccdddadd\^abbT_]bccTac]]b]L^][]Ve[^ZaY_^_^`\\Y]^Y`BBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 17259
+GCCTTGTGTTGTTCCTGGCATCACCGCAGGGAGCCCTGGGGGGCCAGGCGGGCGCTGACCCTGGGCACTGCCGCGCCTGGAGGGGCTGAGCACCG
++
+BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 6003
+CTTCAACTCATCTTGTTATTAATACCATCAATATCCCATGAGGCTCATAAAACGAGTCTTTCTTCTTGGAAACATGACCAAGATTGGGCAAACGT
++
+fffffffffffffffffdffecfcefeffdcfdeeebbbdbccccc\db\`^aa`^Y^^^cbcbaa`bbWY^^^__S_YYR]GWY]\]]XX\_`S
+ at 4118
+TCAAATTGTACTGCAAAGAAGGTCCCAGCTGGTCTCTTCTGGGAGTGATCTAACTAACTTAAGCTGACCCTGTGACTGGCTGAGGATAATCCCTT
++
+dc^ddeeeeeedeee`ceceddadadddcbde_dedc_ec_a^^b\b\\]VIPZY^T^^^\L_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 18416
+GTGGGGAAGCCGAAGAAGCAGCGGAGATCGATTGTAAGAACGACGTCCATGACCAGGGTTGGTGGAGACTGCTTCTCTGCATGCGGGGGAAGGCG
++
+dddacaabdbea\d^cce\da`dd_^__`a`a`b[_^__^\^^^_BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 20115
+TGAAAAAGGAAAACATGGTAGTTTTCTTGTATGAGAGAGCCAGAGCCACCTTGGAGATTTTGTTCTCTCTGTGCGCACCAGTGATGACACAGGGG
++
+ed^eeafffaddfecdddabc^_badd`bd_ddadaa^bbcad\d\__^_\aaa_aY____aaN_\cdc\^aaYbBBBBBBBBBBBBBBBBBBBB
+ at 16139
+TCATCCGAAGAGTTGGCAGGCCCTGTGAATTGTGAAAACAGTATACCCACCCCTTTCCCGGAGCAGGACGCTGAATGTCCAGAGGATGCCAGACC
++
+cabacacY^c\daaddaadad^\ad_a\Y`[ZQ]Y^^OYQ^X^YT\\]U\^RRX^\YJ^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 14123
+GATTTGGGGAAAGGAAACAATAGTTGAGTTTGGGCCACGGGAAATTCAAGATGCCTGGTATGTCAAGTCTGGCAGTTGAAGCAGCAGGGCTGGCG
++
+cccccccac^bYbbT_aa_Yb^^Ta\\^]]aaTaaaaab\b\XL`VZZV]QYYY[aa^^^^_^^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB
+ at 8766
+ACCTGTAAGGTCCGCTCCTGGTGGACACCCACGAAGTCCAGGGCCTCAGGCAGGAAGTTGTAGCGCAGAGTTTTGAGCAGCTGCTCCATCAGGGA
++
+fcfffffcffeffeeefdefddeecdccacddfdYd`d^\_^`\_abbc\b[ba^Y^Z_^^H^Z_^Y_Y_OKWPZR]]Z]`Z``Z^UHZ^BBBBB
diff --git a/tests/data/interleaved.fastq b/tests/data/interleaved.fastq
new file mode 100644
index 0000000..1da3fdb
--- /dev/null
+++ b/tests/data/interleaved.fastq
@@ -0,0 +1,32 @@
+ at read1/1 some text
+TTATTTGTCTCCAGCTTAGACATATCGCCT
++
+##HHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read1/2 other text
+GCTGGAGACAAATAACAGTGGAGTAGTTTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACATTAGACATATCGGATGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTGCAGTGGAGTAACTCCAGC
++
+###HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACATTAGACA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/data/issue46.fasta b/tests/data/issue46.fasta
new file mode 100644
index 0000000..50c9ce5
--- /dev/null
+++ b/tests/data/issue46.fasta
@@ -0,0 +1,2 @@
+>readname
+CGTGA
diff --git a/tests/data/lengths.fa b/tests/data/lengths.fa
new file mode 100644
index 0000000..c03f249
--- /dev/null
+++ b/tests/data/lengths.fa
@@ -0,0 +1,28 @@
+>read_length0a
+T330201030313112312
+>read_length0b
+T1330201030313112312
+>read_length1
+T21330201030313112312
+>read_length2
+T021330201030313112312
+>read_length3
+T3021330201030313112312
+>read_length4
+T33021330201030313112312
+>read_length5
+T233021330201030313112312
+>read_length6
+T0233021330201030313112312
+>read_length7
+T10233021330201030313112312
+>read_length8
+T110233021330201030313112312
+>read_length9
+T1110233021330201030313112312
+>read_length10
+T21110233021330201030313112312
+>read_length11
+T021110233021330201030313112312
+>read_length12
+T0021110233021330201030313112312
diff --git a/tests/data/linked.fasta b/tests/data/linked.fasta
new file mode 100644
index 0000000..5d21f89
--- /dev/null
+++ b/tests/data/linked.fasta
@@ -0,0 +1,10 @@
+>r1 5' adapter and 3' adapter
+AAAAAAAAAACCCCCCCCCCTTTTTTTTTTGGGGGGG
+>r5 only 5' adapter
+AAAAAAAAAACCCCCCCCCCGGGGGGG
+>r3 5' adapter, partial 3' adapter
+AAAAAAAAAACCCGGCCCCCTTTTT
+>r4 only 3' adapter
+GGGGGGGGGGCCCCCCCCCCTTTTTTTTTTGGGGGGG
+>r2 without any adapter
+GGGGGGGGGGGGGGGGGGG
diff --git a/tests/data/lowqual.fastq b/tests/data/lowqual.fastq
new file mode 100644
index 0000000..7d7d92b
--- /dev/null
+++ b/tests/data/lowqual.fastq
@@ -0,0 +1,8 @@
+ at first_sequence
+SEQUENCE1
++
+#########
+ at second_sequence
+SEQUENCE2
++
+#########
diff --git a/tests/data/maxn.fasta b/tests/data/maxn.fasta
new file mode 100644
index 0000000..1110d12
--- /dev/null
+++ b/tests/data/maxn.fasta
@@ -0,0 +1,12 @@
+>r1
+
+>r2
+N
+>r3
+AAAA
+>r4
+AAAAN
+>r5
+AAANN
+>r6
+AANNN
diff --git a/tests/data/multiblock.fastq.gz b/tests/data/multiblock.fastq.gz
new file mode 100644
index 0000000..8c38897
Binary files /dev/null and b/tests/data/multiblock.fastq.gz differ
diff --git a/tests/data/nextseq.fastq b/tests/data/nextseq.fastq
new file mode 100644
index 0000000..0b6acc1
--- /dev/null
+++ b/tests/data/nextseq.fastq
@@ -0,0 +1,8 @@
+ at NS500350:251:HLM7JBGXX:1:11101:12075:1120 1:N:0:TACAGC
+GATCGGAAGAGCACACGTCTGAACTCCAGTCACTACAGCATCTCGTATTCCGTCTTCTGCTTGAAAAAAAAAAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
++
+AAAAAEEEEEEAEEEEAEAEEEEEEAEEEEEEEEEEEEEEE///E/EE////AAEE/E//////EEEEEEE6///////E6EEA/AEAEAE6EEEEEEEEEEEEAEAA/E/EEEEA//EEEEEAEAEE/EEEAEEEE<E/AEEEEE/EEE
+ at NS500350:251:HLM7JBGXX:1:11101:22452:1121 1:N:0:TACAGC
+GATCGGAAGAGCACACGTCTGAACTCCAGTCACTACAGCATCGCGTATGCCGTCTTATGCTTGAAAAAAAAAAAGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
++
+AAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE/////E/EE//E6///E//A//E//EEEEEEEE6//EEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE<EEEEEEEE
diff --git a/tests/data/no_indels.fasta b/tests/data/no_indels.fasta
new file mode 100644
index 0000000..7a6afb7
--- /dev/null
+++ b/tests/data/no_indels.fasta
@@ -0,0 +1,20 @@
+# 3' adapter: TTAGACATAT
+# 5' adapter: GAGATTGCCA
+>3p_orig
+TGAACATAGCTTAGACATATAACCG
+>3p_mism
+TGAACATAGCTTACACATATAACCG
+>3p_del
+TGAACATAGCTTAACATATAACCG
+>3p_ins
+TGAACATAGCTTAGGACATATAACCG
+>3p_frontins
+TAGACATATAACCG
+>5p_orig
+TCCTCGAGATTGCCATACTGCTTCTCGAA
+>5p_mism
+TCCTCGAGATAGCCATACTGCTTCTCGAA
+>5p_del
+TCCTCGAGATGCCATACTGCTTCTCGAA
+>5p_ins
+TCCTCGAGATATGCCATACTGCTTCTCGAA
diff --git a/tests/data/overlapa.fa b/tests/data/overlapa.fa
new file mode 100644
index 0000000..3a4fac7
--- /dev/null
+++ b/tests/data/overlapa.fa
@@ -0,0 +1,40 @@
+>read1
+T0021110233021330201030313112312
+>read2
+T002111023302133020103031311231
+>read3
+T00211102330213302010303131123
+>read4
+T0021110233021330201030313112
+>read5
+T002111023302133020103031311
+>read6
+T00211102330213302010303131
+>read7
+T0021110233021330201030313
+>read8
+T002111023302133020103031
+>read9
+T00211102330213302010303
+>read10
+T0021110233021330201030
+>read11
+T002111023302133020103
+>read12
+T00211102330213302010
+>read13
+T0021110233021330201
+>read14
+T002111023302133020
+>read15
+T00211102330213302
+>read16
+T0021110233021330
+>read17
+T002111023302133
+>read18
+T00211102330213
+>read19
+T0021110233021
+>read20
+T002111023302
diff --git a/tests/data/overlapb.fa b/tests/data/overlapb.fa
new file mode 100644
index 0000000..c268fc3
--- /dev/null
+++ b/tests/data/overlapb.fa
@@ -0,0 +1,38 @@
+>adaptlen18
+TTAGACATATCTCCGTCGATACTTACCCGTA
+>adaptlen17
+TAGACATATCTCCGTCGATACTTACCCGTA
+>adaptlen16
+AGACATATCTCCGTCGATACTTACCCGTA
+>adaptlen15
+GACATATCTCCGTCGATACTTACCCGTA
+>adaptlen14
+ACATATCTCCGTCGATACTTACCCGTA
+>adaptlen13
+CATATCTCCGTCGATACTTACCCGTA
+>adaptlen12
+ATATCTCCGTCGATACTTACCCGTA
+>adaptlen11
+TATCTCCGTCGATACTTACCCGTA
+>adaptlen10
+ATCTCCGTCGATACTTACCCGTA
+>adaptlen9
+TCTCCGTCGATACTTACCCGTA
+>adaptlen8
+CTCCGTCGATACTTACCCGTA
+>adaptlen7
+TCCGTCGATACTTACCCGTA
+>adaptlen6
+CCGTCGATACTTACCCGTA
+>adaptlen5
+CGTCGATACTTACCCGTA
+>adaptlen4
+GTCGATACTTACCCGTA
+>adaptlen3
+TCGATACTTACCCGTA
+>adaptlen2
+CGATACTTACCCGTA
+>adaptlen1
+GATACTTACCCGTA
+>adaptlen0
+ATACTTACCCGTA
diff --git a/tests/data/paired.1.fastq b/tests/data/paired.1.fastq
new file mode 100644
index 0000000..3f2d733
--- /dev/null
+++ b/tests/data/paired.1.fastq
@@ -0,0 +1,16 @@
+ at read1/1 some text
+TTATTTGTCTCCAGCTTAGACATATCGCCT
++
+##HHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/1
+CAACAGGCCACATTAGACATATCGGATGGT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/1
+CCAACTTGATATTAATAACATTAGACA
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/1
+GACAGGCCGTTTGAATGTTGACGGGATGTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
diff --git a/tests/data/paired.2.fastq b/tests/data/paired.2.fastq
new file mode 100644
index 0000000..808df31
--- /dev/null
+++ b/tests/data/paired.2.fastq
@@ -0,0 +1,16 @@
+ at read1/2 other text
+GCTGGAGACAAATAACAGTGGAGTAGTTTT
++
+HHHHHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read2/2
+TGTGGCCTGTTGCAGTGGAGTAACTCCAGC
++
+###HHHHHHHHHHHHHHHHHHHHHHHHHHH
+ at read3/2
+TGTTATTAATATCAAGTTGGCAGTG
++
+#HHHHHHHHHHHHHHHHHHHHHHHH
+ at read4/2
+CATCCCGTCAACATTCAAACGGCCTGTCCA
++
+HH############################
diff --git a/tests/data/plus.fastq b/tests/data/plus.fastq
new file mode 100644
index 0000000..35849f8
--- /dev/null
+++ b/tests/data/plus.fastq
@@ -0,0 +1,8 @@
+ at first_sequence some other text
+SEQUENCE1
++first_sequence some other text
+:6;;8<=:<
+ at second_sequence and more text
+SEQUENCE2
++second_sequence and more text
+83<??:(61
diff --git a/tests/data/polya.fasta b/tests/data/polya.fasta
new file mode 100644
index 0000000..4f02229
--- /dev/null
+++ b/tests/data/polya.fasta
@@ -0,0 +1,6 @@
+>polyA
+AAACTTCAGAACAGAAAAAAAAAAAAAAAAAAAAA
+>polyAlong
+CTTAGTTCAATWTTAACCAAACTTCAGAACAGAAAAAAAAAAAAAAAAAAAAAGAAAAAAAAAAAAAAAAAAAA
+>polyA2
+AAACTTAACAAGAACAAGAAAAAAAAAAAAAAAAAAAAA
diff --git a/tests/data/prefix-adapter.fasta b/tests/data/prefix-adapter.fasta
new file mode 100644
index 0000000..b56e57b
--- /dev/null
+++ b/tests/data/prefix-adapter.fasta
@@ -0,0 +1,2 @@
+>prefixadapter
+^FRONTADAPT
diff --git a/tests/data/rest.fa b/tests/data/rest.fa
new file mode 100644
index 0000000..31277ed
--- /dev/null
+++ b/tests/data/rest.fa
@@ -0,0 +1,18 @@
+>read1
+TESTINGADAPTERREST1
+>read2
+TESTINGADAPTERRESTING
+>read3
+TESTINGADAPTER
+>read4
+TESTINGADAPTERRESTLESS
+>read5
+TESTINGADAPTERRESTORE
+>read6
+ADAPTERSOMETHING
+>read7
+DAPTERSOMETHING
+>read8
+RESTADAPTERSOMETHING
+>read9
+NOREST
diff --git a/tests/data/rest.txt b/tests/data/rest.txt
new file mode 100644
index 0000000..31b1941
--- /dev/null
+++ b/tests/data/rest.txt
@@ -0,0 +1,5 @@
+REST1 read1
+RESTING read2
+RESTLESS read4
+RESTORE read5
+SOMETHING read8
diff --git a/tests/data/restfront.txt b/tests/data/restfront.txt
new file mode 100644
index 0000000..3cdba2f
--- /dev/null
+++ b/tests/data/restfront.txt
@@ -0,0 +1,6 @@
+TESTING read1
+TESTING read2
+TESTING read3
+TESTING read4
+TESTING read5
+REST read8
diff --git a/tests/data/s_1_sequence.txt.gz b/tests/data/s_1_sequence.txt.gz
new file mode 100644
index 0000000..3967383
Binary files /dev/null and b/tests/data/s_1_sequence.txt.gz differ
diff --git a/tests/data/simple.fasta b/tests/data/simple.fasta
new file mode 100644
index 0000000..e5c1d4c
--- /dev/null
+++ b/tests/data/simple.fasta
@@ -0,0 +1,7 @@
+# a comment
+# another one
+>first_sequence
+SEQUENCE1
+>second_sequence
+SEQUEN
+CE2
diff --git a/tests/data/simple.fastq b/tests/data/simple.fastq
new file mode 100644
index 0000000..f728223
--- /dev/null
+++ b/tests/data/simple.fastq
@@ -0,0 +1,8 @@
+ at first_sequence
+SEQUENCE1
++
+:6;;8<=:<
+ at second_sequence
+SEQUENCE2
++
+83<??:(61
diff --git a/tests/data/small.fastq b/tests/data/small.fastq
new file mode 100644
index 0000000..767ca22
--- /dev/null
+++ b/tests/data/small.fastq
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGATTAGACAAAT
++
+)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT
++
+;<:&:A;A!9<<<,7:<=3=;:<&<?<?8<;=<&
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/data/small.fastq.bz2 b/tests/data/small.fastq.bz2
new file mode 100644
index 0000000..d71a53a
Binary files /dev/null and b/tests/data/small.fastq.bz2 differ
diff --git a/tests/data/small.fastq.gz b/tests/data/small.fastq.gz
new file mode 100644
index 0000000..f843389
Binary files /dev/null and b/tests/data/small.fastq.gz differ
diff --git a/tests/data/small.fastq.xz b/tests/data/small.fastq.xz
new file mode 100644
index 0000000..a7f38cb
Binary files /dev/null and b/tests/data/small.fastq.xz differ
diff --git a/tests/data/small.myownextension b/tests/data/small.myownextension
new file mode 100644
index 0000000..767ca22
--- /dev/null
+++ b/tests/data/small.myownextension
@@ -0,0 +1,12 @@
+ at prefix:1_13_573/1
+CGTCCGAANTAGCTACCACCCTGATTAGACAAAT
++
+)3%)&&&&!.1&(6:<'67..*,:75)'77&&&5
+ at prefix:1_13_1259/1
+AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT
++
+;<:&:A;A!9<<<,7:<=3=;:<&<?<?8<;=<&
+ at prefix:1_13_1440/1
+CAAGATCTNCCCTGCCACATTGCCCTAGTTAAAC
++
+<=A:A=57!7<';<6?5;;6:+:=)71>70<,=:
diff --git a/tests/data/solid.csfasta b/tests/data/solid.csfasta
new file mode 100644
index 0000000..1045429
--- /dev/null
+++ b/tests/data/solid.csfasta
@@ -0,0 +1,63 @@
+# Tue May  5 13:57:32 2009 /share/apps/corona/bin/filter_fasta.pl --output=/data/results/s0103/s0103_20090430_552to561_2_2/552to561/results.01/primary.20090505091459275 --name=s0103_20090430_552to561_2_2_552to561 --tag=F3 --minlength=35 --mincalls=25 --prefix=T /data/results/s0103/s0103_20090430_552to561_2_2/552to561/jobs/postPrimerSetPrimary.197/rawseq
+# Cwd: /state/partition1/home/pipeline
+# Title: s0103_20090430_552to561_2_2_552to561
+>1_13_85_F3
+T110020300.0113010210002110102330021
+>1_13_573_F3
+T312311200.3021301101113203302010003
+>1_13_1259_F3
+T002112130.2012223322111330201230313
+>1_13_1440_F3
+T110020313.1113211010332111302330001
+>1_14_177_F3
+T31330222020233321121323302013303311
+>1_14_238_F3
+T01331031200310022122230330201030313
+>1_15_1098_F3
+T32333033222233020223032312232220332
+>1_16_404_F3
+T03310320002130202331112133020103031
+>1_16_904_F3
+T21230102331022312232132021122111212
+>1_16_1315_F3
+T03231231112210333010310323302010003
+>1_16_1595_F3
+T22323211312111230022210011213302012
+>1_17_1379_F3
+T32011212111223230232132311321200123
+>1_18_1692_F3
+T12322233031100211233323300112200210
+>1_19_171_F3
+T10101101220213201111011320201230032
+>1_22_72_F3
+T13303032323221212301322233320210233
+>1_22_1377_F3
+T22221333311222312201132312022322300
+>1_23_585_F3
+T30010310310130312122123302013303131
+>1_23_809_F3
+T13130101101021211013220302223302112
+>1_24_138_F3
+T33211130100120323002033020123031311
+>1_24_206_F3
+T33330332002223002020303331321221000
+>1_25_143_F3
+T23202003031200220301303302012203132
+>1_25_1866_F3
+T03201321022131101112012330221130311
+>1_27_584_F3
+T10010330110103213112323303012103101
+>1_27_1227_F3
+T02003022123001003201002031303302011
+>1_27_1350_F3
+T13130101101021211013220222221301231
+>1_29_477_F3
+T13130101101021211013300302223003030
+>1_30_882_F3
+T20102033000233133320103031311233200
+>1_31_221_F3
+T03301311201100030300100233220102031
+>1_31_1313_F3
+T01331131300330122321000101010330201
+>1_529_129_F3
+T132222301020322102101322221322302.3302.3.3..221..3
diff --git a/tests/data/solid.fasta b/tests/data/solid.fasta
new file mode 100644
index 0000000..f9f1777
--- /dev/null
+++ b/tests/data/solid.fasta
@@ -0,0 +1,4 @@
+>problem1
+T01120212022222011231210231030201330
+>problem2
+T20201030313112322220210033020133031
diff --git a/tests/data/solid.fastq b/tests/data/solid.fastq
new file mode 100644
index 0000000..82c7b7a
--- /dev/null
+++ b/tests/data/solid.fastq
@@ -0,0 +1,120 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+7&9<&77)& <7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.3021301101113203302010003
++
+6)3%)&&&& .1&(6:<'67..*,:75)'77&&&5
+ at 1_13_1259_F3
+T002112130.2012223322111330201230313
++
+=;<:&:A;A 9<<<,7:<=3=;:<&<?<?8<;=<&
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+=<=A:A=57 7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T01331031200310022122230330201030313
++
+?><5=;<<<12>=<;1;;=5);.;14:0>2;:3;7
+ at 1_15_1098_F3
+T32333033222233020223032312232220332
++
+#,##(#5##*#($$'#.##)$&#%)$1##-$&##%
+ at 1_16_404_F3
+T03310320002130202331112133020103031
++
+78;:;;><>9=9;<<2=><<1;58;9<<;>(<;<;
+ at 1_16_904_F3
+T21230102331022312232132021122111212
++
+9>=::6;;99=+/'$+#.#&%$&'(($1*$($.#.
+ at 1_16_1315_F3
+T03231231112210333010310323302010003
++
+<9<8A?>?::;6&,%;6/)8<<#/;79(448&*.)
+ at 1_16_1595_F3
+T22323211312111230022210011213302012
++
+>,<=<>@6<;?<=>:/=.>&;;8;)17:=&,>1=+
+ at 1_17_1379_F3
+T32011212111223230232132311321200123
++
+/-1179<1;>>8:':7-%/::0&+=<29,7<8(,2
+ at 1_18_1692_F3
+T12322233031100211233323300112200210
++
+.#(###5%)%2)',2&:+#+&5,($/1#&4&))$6
+ at 1_19_171_F3
+T10101101220213201111011320201230032
++
+)6:65/=3*:(8%)%2>&8&%;%0&#;$3$&:$#&
+ at 1_22_72_F3
+T13303032323221212301322233320210233
++
+3/#678<:.=9::6:(<538295;9+;&*;)+',&
+ at 1_22_1377_F3
+T22221333311222312201132312022322300
++
+)##0%.$.1*%,)95+%%14%$#8-###9-()#9+
+ at 1_23_585_F3
+T30010310310130312122123302013303131
++
+>55;8><96/18?)<3<58<5:;96=7:1=8=:-<
+ at 1_23_809_F3
+T13130101101021211013220302223302112
++
+:7<59@;<<5;/9;=<;7::.)&&&827(+221%(
+ at 1_24_138_F3
+T33211130100120323002033020123031311
++
+6)68/;906#,25/&;<$0+250#2,<)5,9/+7)
+ at 1_24_206_F3
+T33330332002223002020303331321221000
++
+))4(&)9592)#)694(,)292:(=7$.18,()65
+ at 1_25_143_F3
+T23202003031200220301303302012203132
++
+:4;/#&<9;&*;95-7;85&;587#16>%&,9<2&
+ at 1_25_1866_F3
+T03201321022131101112012330221130311
++
+=<>9;<@7?(=6,<&?=6=(=<641:?'<1=;':4
+ at 1_27_584_F3
+T10010330110103213112323303012103101
++
+82'('*.-8+%#2)(-&3.,.2,),+.':&,'(&/
+ at 1_27_1227_F3
+T02003022123001003201002031303302011
++
+492:;>A:<;34<<=);:<<;9=7<3::<::3=>'
+ at 1_27_1350_F3
+T13130101101021211013220222221301231
++
+95,)<(4./;<938=64=+2/,.4),3':97#33&
+ at 1_29_477_F3
+T13130101101021211013300302223003030
++
+94=55:75=+:/7><968;;#&+$#3&6,#1#4#'
+ at 1_30_882_F3
+T20102033000233133320103031311233200
++
+2(+-:-3<;5##/;:(%&84'#:,?3&&8>-();5
+ at 1_31_221_F3
+T03301311201100030300100233220102031
++
+89>9>5<139/,&:7969972.274&%:78&&746
+ at 1_31_1313_F3
+T01331131300330122321000101010330201
++
+;3<7=7::)5*4=&;<7>4;795065;9';896'=
+ at 1_529_129_F3
+T132222301020322102101322221322302.3302.3.3..221..3
++
+>>%/((B6-&5A0:6)>;'1)B*38/?(5=%B+ &<-9 % @  )%)  (
diff --git a/tests/data/solid.qual b/tests/data/solid.qual
new file mode 100644
index 0000000..f7c5c43
--- /dev/null
+++ b/tests/data/solid.qual
@@ -0,0 +1,63 @@
+# Tue May  5 13:57:32 2009 /share/apps/corona/bin/filter_fasta.pl --output=/data/results/s0103/s0103_20090430_552to561_2_2/552to561/results.01/primary.20090505091459275 --name=s0103_20090430_552to561_2_2_552to561 --tag=F3 --minlength=35 --mincalls=25 --prefix=T /data/results/s0103/s0103_20090430_552to561_2_2/552to561/jobs/postPrimerSetPrimary.197/rawseq
+# Cwd: /state/partition1/home/pipeline
+# Title: s0103_20090430_552to561_2_2_552to561
+>1_13_85_F3
+22 5 24 27 5 22 22 8 5 -1 27 22 8 8 4 19 6 21 20 22 12 16 10 24 26 24 11 13 27 23 8 26 13 26 23
+>1_13_573_F3
+21 8 18 4 8 5 5 5 5 -1 13 16 5 7 21 25 27 6 21 22 13 13 9 11 25 22 20 8 6 22 22 5 5 5 20
+>1_13_1259_F3
+28 26 27 25 5 25 32 26 32 -1 24 27 27 27 11 22 25 27 28 18 28 26 25 27 5 27 30 27 30 23 27 26 28 27 5
+>1_13_1440_F3
+28 27 28 32 25 32 28 20 22 -1 22 27 6 26 27 21 30 20 26 26 21 25 10 25 28 8 22 16 29 22 15 27 11 28 25
+>1_14_177_F3
+25 23 24 20 22 26 26 20 19 8 6 24 23 24 17 19 24 15 20 26 26 8 21 25 22 26 16 25 18 27 23 23 7 24 25
+>1_14_238_F3
+30 29 27 20 28 26 27 27 27 16 17 29 28 27 26 16 26 26 28 20 8 26 13 26 16 19 25 15 29 17 26 25 18 26 22
+>1_15_1098_F3
+2 11 2 2 7 2 20 2 2 9 2 7 3 3 6 2 13 2 2 8 3 5 2 4 8 3 16 2 2 12 3 5 2 2 4
+>1_16_404_F3
+22 23 26 25 26 26 29 27 29 24 28 24 26 27 27 17 28 29 27 27 16 26 20 23 26 24 27 27 26 29 7 27 26 27 26
+>1_16_904_F3
+24 29 28 25 25 21 26 26 24 24 28 10 14 6 3 10 2 13 2 5 4 3 5 6 7 7 3 16 9 3 7 3 13 2 13
+>1_16_1315_F3
+27 24 27 23 32 30 29 30 25 25 26 21 5 11 4 26 21 14 8 23 27 27 2 14 26 22 24 7 19 19 23 5 9 13 8
+>1_16_1595_F3
+29 11 27 28 27 29 31 21 27 26 30 27 28 29 25 14 28 13 29 5 26 26 23 26 8 16 22 25 28 5 11 29 16 28 10
+>1_17_1379_F3
+14 12 16 16 22 24 27 16 26 29 29 23 25 6 25 22 12 4 14 25 25 15 5 10 28 27 17 24 11 22 27 23 7 11 17
+>1_18_1692_F3
+13 2 7 2 2 2 20 4 8 4 17 8 6 11 17 5 25 10 2 10 5 20 11 7 3 14 16 2 5 19 5 8 8 3 21
+>1_19_171_F3
+8 21 25 21 20 14 28 18 9 25 7 23 4 8 4 17 29 5 23 5 4 26 4 15 5 2 26 3 18 3 5 25 3 2 5
+>1_22_72_F3
+18 14 2 21 22 23 27 25 13 28 24 25 25 21 25 7 27 20 18 23 17 24 20 26 24 10 26 5 9 26 8 10 6 11 5
+>1_22_1377_F3
+8 2 2 15 4 13 3 13 16 9 4 11 8 24 20 10 4 4 16 19 4 3 2 23 12 2 2 2 24 12 7 8 2 24 10
+>1_23_585_F3
+29 20 20 26 23 29 27 24 21 14 16 23 30 8 27 18 27 20 23 27 20 25 26 24 21 28 22 25 16 28 23 28 25 12 27
+>1_23_809_F3
+25 22 27 20 24 31 26 27 27 20 26 14 24 26 28 27 26 22 25 25 13 8 5 5 5 23 17 22 7 10 17 17 16 4 7
+>1_24_138_F3
+21 8 21 23 14 26 24 15 21 2 11 17 20 14 5 26 27 3 15 10 17 20 15 2 17 11 27 8 20 11 24 14 10 22 8
+>1_24_206_F3
+8 8 19 7 5 8 24 20 24 17 8 2 8 21 24 19 7 11 8 17 24 17 25 7 28 22 3 13 16 23 11 7 8 21 20
+>1_25_143_F3
+25 19 26 14 2 5 27 24 26 5 9 26 24 20 12 22 26 23 20 5 26 20 23 22 2 16 21 29 4 5 11 24 27 17 5
+>1_25_1866_F3
+28 27 29 24 26 27 31 22 30 7 28 21 11 27 5 30 28 21 28 7 28 27 21 19 16 25 30 6 27 16 28 26 6 25 19
+>1_27_584_F3
+23 17 6 7 6 9 13 12 23 10 4 2 17 8 7 12 5 18 13 11 13 17 11 8 11 10 13 6 25 5 11 6 7 5 14
+>1_27_1227_F3
+19 24 17 25 26 29 32 25 27 26 18 19 27 27 28 8 26 25 27 27 26 24 28 22 27 18 25 25 27 25 25 18 28 29 6
+>1_27_1350_F3
+24 20 11 8 27 7 19 13 14 26 27 24 18 23 28 21 19 28 10 17 14 11 13 19 8 11 18 6 25 24 22 2 18 18 5
+>1_29_477_F3
+24 19 28 20 20 25 22 20 28 10 25 14 22 29 27 24 21 23 26 26 2 5 10 3 2 18 5 21 11 2 16 2 19 2 6
+>1_30_882_F3
+17 7 10 12 25 12 18 27 26 20 2 2 14 26 25 7 4 5 23 19 6 2 25 11 30 18 5 5 23 29 12 7 8 26 20
+>1_31_221_F3
+23 24 29 24 29 20 27 16 18 24 14 11 5 25 22 24 21 24 24 22 17 13 17 22 19 5 4 25 22 23 5 5 22 19 21
+>1_31_1313_F3
+26 18 27 22 28 22 25 25 8 20 9 19 28 5 26 27 22 29 19 26 22 24 20 15 21 20 26 24 6 26 23 24 21 6 28
+>1_529_129_F3
+29 29 4 14 7 7 33 21 12 5 20 32 15 25 21 8 29 26 6 16 8 33 9 18 23 14 30 7 20 28 4 33 10 -1 5 27 12 24 -1 4 -1 31 -1 -1 8 4 8 -1 -1 7 
diff --git a/tests/data/solid5p.fasta b/tests/data/solid5p.fasta
new file mode 100644
index 0000000..6b6d2cb
--- /dev/null
+++ b/tests/data/solid5p.fasta
@@ -0,0 +1,34 @@
+#  used adapter: CCGGAGGTCAGCTCGCTATA
+# in colorspace: C0302201212322332333
+>read1
+T1212322332333012001112122203233202221000211
+>read2
+T201212322332333200121311212133113001311002032
+>read3
+T02201212322332333211133003002232323010012320300
+>read4
+T0302201212322332333002010102312033021011121312131
+>read5
+T20302201212322332333221313210102120020302022233110
+>read6
+T20302211212322332333031203203013323021010020301321
+>read7
+T21301020302201212322332333203020130202120211322010013211
+>read8
+T2310321030130120302201212322332333232202123123111113113003200330
+>read9
+T0002132103320302201212322332333020123133023120320131020333011
+>read10
+T00322031320033220302201212322332333201130233321321011303133231200
+>read11
+T0302201212322332333.02010102312033021011121312131
+>read12
+T030220121232233233321
+>read13
+T03022012123223323332
+>read14
+T0302201212322332333
+>read15
+T030220121232233233
+>read16
+T030220121232233233
diff --git a/tests/data/solid5p.fastq b/tests/data/solid5p.fastq
new file mode 100644
index 0000000..1efba0a
--- /dev/null
+++ b/tests/data/solid5p.fastq
@@ -0,0 +1,64 @@
+ at read1
+T1212322332333012001112122203233202221000211
++
+:58)2";%4A,8>0;9C\'?276>#)49"<,>?/\'!A4$.%+
+ at read2
+T201212322332333200121311212133113001311002032
++
+44<@;(<3.37/''=:-9AA<&C2%$$;?A&5!C69:?-;&;65.
+ at read3
+T02201212322332333211133003002232323010012320300
++
+2!<A-BB&A/)'103&2$!00>#97*B.0A-@(*","B3><4&16(:
+ at read4
+T0302201212322332333002010102312033021011121312131
++
+74-:$-;&@>@0581-82'<&-81+%)7;<)6?83!&CB9"9B6307=&
+ at read5
+T20302201212322332333221313210102120020302022233110
++
+';4!-6?0$45.C#B+$(4+$9)27,(-*=,#4:;"/4++5<, at -784*'
+ at read6
+T20302211212322332333031203203013323021010020301321
++
++3"85:2=3<")$66*#4".4!.;:C%97@>75-";';*)A67CCC")$*
+ at read7
+T21301020302201212322332333203020130202120211322010013211
++
+,;0B at A"98!<=!*;5;650;';79!+8,4(2=+98:B at C@:+3*>2+6+2++C0.
+ at read8
+T2310321030130120302201212322332333232202123123111113113003200330
++
+C/$-"=6+1.8?AB!?'#.585 at 6:47@?>.315A-'9<%">6,+)*,)1-;:(691>?C)4A;
+ at read9
+T0002132103320302201212322332333020123133023120320131020333011
++
+(&?527&:=;6 at 6@03%95(-0#$:B8::B*4?@&)6>79C>)6C'5-#<!B:>0:A8+2*
+ at read10
+T00322031320033220302201212322332333201130233321321011303133231200
++
+&53)>2.+9?7%=&21;8!820961%3#0'5C.28347,2(55*1.,>%:(1A'A5=@7&&5?4'
+ at read11
+T0302201212322332333.02010102312033021011121312131
++
+6=@!85+6<A(&#@7"'C:&8B"195'@,@&:5=7;!&-9:%<!)>((>
+ at read12
+T030220121232233233321
++
+4&1.?+<-0(!(;://+0@?C
+ at read13
+T03022012123223323332
++
+!&,>"772,,/2/2A1C%5C
+ at read14
+T0302201212322332333
++
+@%$#B$A0B0&((<C*+.A
+ at read15
+T030220121232233233
++
+?B=,A#5"*?7268++:2
+ at read16
+T030220121232233233
++
+C=C=C:=+ at 77@723!C5
diff --git a/tests/data/sra.fastq b/tests/data/sra.fastq
new file mode 100644
index 0000000..a92a89c
--- /dev/null
+++ b/tests/data/sra.fastq
@@ -0,0 +1,24 @@
+ at 1_13_85_F3
+T110020300.0113010210002110102330021
++
+!7&9<&77)& <7))%4'657-1+9;9,.<8);.;8
+ at 1_13_573_F3
+T312311200.3021301101113203302010003
++
+!6)3%)&&&& .1&(6:<'67..*,:75)'77&&&5
+ at 1_13_1259_F3
+T002112130.2012223322111330201230313
++
+!=;<:&:A;A 9<<<,7:<=3=;:<&<?<?8<;=<&
+ at 1_13_1440_F3
+T110020313.1113211010332111302330001
++
+!=<=A:A=57 7<';<6?5;;6:+:=)71>70<,=:
+ at 1_14_177_F3
+T31330222020233321121323302013303311
++
+!:8957;;54)'98924905;;)6:7;1:3<88(9:
+ at 1_14_238_F3
+T01331031200310022122230330201030313
++
+!?><5=;<<<12>=<;1;;=5);.;14:0>2;:3;7
diff --git a/tests/data/suffix-adapter.fasta b/tests/data/suffix-adapter.fasta
new file mode 100644
index 0000000..65c68c3
--- /dev/null
+++ b/tests/data/suffix-adapter.fasta
@@ -0,0 +1,2 @@
+>suffixadapter
+BACKADAPTER$
diff --git a/tests/data/toolong.fa b/tests/data/toolong.fa
new file mode 100644
index 0000000..79a9a79
--- /dev/null
+++ b/tests/data/toolong.fa
@@ -0,0 +1,14 @@
+>read_length6
+T023302
+>read_length7
+T1023302
+>read_length8
+T11023302
+>read_length9
+T111023302
+>read_length10
+T2111023302
+>read_length11
+T02111023302
+>read_length12
+T002111023302
diff --git a/tests/data/tooshort.fa b/tests/data/tooshort.fa
new file mode 100644
index 0000000..a5e4711
--- /dev/null
+++ b/tests/data/tooshort.fa
@@ -0,0 +1,12 @@
+>read_length0a
+T
+>read_length0b
+T
+>read_length1
+T2
+>read_length2
+T02
+>read_length3
+T302
+>read_length4
+T3302
diff --git a/tests/data/tooshort.noprimer.fa b/tests/data/tooshort.noprimer.fa
new file mode 100644
index 0000000..e5e22b4
--- /dev/null
+++ b/tests/data/tooshort.noprimer.fa
@@ -0,0 +1,14 @@
+>read_length0a
+
+>read_length0b
+
+>read_length1
+
+>read_length2
+2
+>read_length3
+02
+>read_length4
+302
+>read_length5
+3302
diff --git a/tests/data/trimN3.fasta b/tests/data/trimN3.fasta
new file mode 100644
index 0000000..d936bad
--- /dev/null
+++ b/tests/data/trimN3.fasta
@@ -0,0 +1,2 @@
+>read1
+CAGTCGGTCCTGAGAGATGGGCGAGCGCTGGNANNNNNNNG
diff --git a/tests/data/trimN5.fasta b/tests/data/trimN5.fasta
new file mode 100644
index 0000000..ce681fe
--- /dev/null
+++ b/tests/data/trimN5.fasta
@@ -0,0 +1,2 @@
+>read1
+NGGCCTGGAATTCTCGGGTGCCAAGGAACTCCAGTCACCAG
diff --git a/tests/data/twoadapters.fasta b/tests/data/twoadapters.fasta
new file mode 100644
index 0000000..68d59a1
--- /dev/null
+++ b/tests/data/twoadapters.fasta
@@ -0,0 +1,6 @@
+>read1
+GATCCTCCTGGAGCTGGCTGATACCAGTATACCAGTGCTGATTGTTGAATTTCAGGAATTTCTCAAGCTCGGTAGC
+>read2
+CTCGAGAATTCTGGATCCTCTCTTCTGCTACCTTTGGGATTTGCTTGCTCTTGGTTCTCTAGTTCTTGTAGTGGTG
+>read3 (no adapter)
+AATGAAGGTTGTAACCATAACAGGAAGTCATGCGCATTTAGTCGAGCACGTAAGTTCATACGGAAATGGGTAAG
diff --git a/tests/data/wildcard.fa b/tests/data/wildcard.fa
new file mode 100644
index 0000000..f482927
--- /dev/null
+++ b/tests/data/wildcard.fa
@@ -0,0 +1,4 @@
+>1
+ANGTACGTTGCATGCA
+>2
+ACGTANGTTGCATGCA
diff --git a/tests/data/wildcardN.fa b/tests/data/wildcardN.fa
new file mode 100644
index 0000000..5c15266
--- /dev/null
+++ b/tests/data/wildcardN.fa
@@ -0,0 +1,6 @@
+>perfect
+TTTGGGGGGG
+>withN
+TTTGGNGGGG
+>1mism
+TTTGGGGCGG
diff --git a/tests/data/wildcard_adapter.fa b/tests/data/wildcard_adapter.fa
new file mode 100644
index 0000000..a62b84c
--- /dev/null
+++ b/tests/data/wildcard_adapter.fa
@@ -0,0 +1,8 @@
+>1
+ACGTAAAACGTTGCATGCA
+>2
+ACGTGGGACGTTGCATGCA
+>3b
+TGGCTGGCCACGTCCCACGTAA
+>4b
+TGGCTGGCCACGTTTTACGTCC
diff --git a/tests/data/withplus.fastq b/tests/data/withplus.fastq
new file mode 100644
index 0000000..b71fc07
--- /dev/null
+++ b/tests/data/withplus.fastq
@@ -0,0 +1,8 @@
+ at first_sequence
+SEQUENCE1
++this is different
+:6;;8<=:<
+ at second_sequence
+SEQUENCE2
++also different
+83<??:(61
diff --git a/tests/testadapters.py b/tests/testadapters.py
new file mode 100644
index 0000000..4d3147b
--- /dev/null
+++ b/tests/testadapters.py
@@ -0,0 +1,125 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+from nose.tools import raises, assert_raises
+
+from cutadapt.seqio import Sequence
+from cutadapt.adapters import (Adapter, Match, ColorspaceAdapter, FRONT, BACK,
+	parse_braces, LinkedAdapter)
+
+def test_issue_52():
+	adapter = Adapter(
+		sequence='GAACTCCAGTCACNNNNN',
+		where=BACK,
+		max_error_rate=0.12,
+		min_overlap=5,
+		read_wildcards=False,
+		adapter_wildcards=True)
+	read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
+	am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, adapter=adapter, read=read)
+	assert am.wildcards() == 'GGC'
+	"""
+	The result above should actually be 'CGGC' since the correct
+	alignment is this one:
+
+	adapter         GAACTCCAGTCACNNNNN
+	mismatches           X     X
+	read       CCCCAGAACTACAGTC-CCGGC
+
+	Since we do not keep the alignment, guessing 'GGC' is the best we
+	can currently do.
+	"""
+
+
+def test_issue_80():
+	# This issue turned out to not be an actual issue with the alignment
+	# algorithm. The following alignment is found because it has more matches
+	# than the 'obvious' one:
+	#
+	# TCGTATGCCGTCTTC
+	# =========X==XX=
+	# TCGTATGCCCTC--C
+	#
+	# This is correct, albeit a little surprising, since an alignment without
+	# indels would have only two errors.
+
+	adapter = Adapter(
+		sequence="TCGTATGCCGTCTTC",
+		where=BACK,
+		max_error_rate=0.2,
+		min_overlap=3,
+		read_wildcards=False,
+		adapter_wildcards=False)
+	read = Sequence(name="seq2", sequence="TCGTATGCCCTCC")
+	result = adapter.match_to(read)
+	assert result.errors == 3, result
+	assert result.astart == 0, result
+	assert result.astop == 15, result
+
+
+def test_str():
+	a = Adapter('ACGT', where=BACK, max_error_rate=0.1)
+	str(a)
+	str(a.match_to(Sequence(name='seq', sequence='TTACGT')))
+	ca = ColorspaceAdapter('0123', where=BACK, max_error_rate=0.1)
+	str(ca)
+
+
+ at raises(ValueError)
+def test_color():
+	ColorspaceAdapter('0123', where=FRONT, max_error_rate=0.1)
+
+
+def test_parse_braces():
+	assert parse_braces('') == ''
+	assert parse_braces('A') == 'A'
+	assert parse_braces('A{0}') == ''
+	assert parse_braces('A{1}') == 'A'
+	assert parse_braces('A{2}') == 'AA'
+	assert parse_braces('A{2}C') == 'AAC'
+	assert parse_braces('ACGTN{3}TGACCC') == 'ACGTNNNTGACCC'
+	assert parse_braces('ACGTN{10}TGACCC') == 'ACGTNNNNNNNNNNTGACCC'
+	assert parse_braces('ACGTN{3}TGA{4}CCC') == 'ACGTNNNTGAAAACCC'
+	assert parse_braces('ACGTN{0}TGA{4}CCC') == 'ACGTTGAAAACCC'
+
+
+def test_parse_braces_fail():
+	for expression in ['{', '}', '{}', '{5', '{1}', 'A{-7}', 'A{', 'A{1', 'N{7', 'AN{7', 'A{4{}',
+			'A{4}{3}', 'A{b}', 'A{6X}', 'A{X6}']:
+		assert_raises(ValueError, lambda: parse_braces(expression))
+
+
+def test_linked_adapter():
+	linked_adapter = LinkedAdapter('AAAA', 'TTTT')
+	sequence = Sequence(name='seq', sequence='AAAACCCCCTTTT')
+	match = linked_adapter.match_to(sequence)
+	trimmed = linked_adapter.trimmed(match)
+	assert trimmed.name == 'seq'
+	assert trimmed.sequence == 'CCCCC'
+
+
+def test_info_record():
+	adapter = Adapter(
+		sequence='GAACTCCAGTCACNNNNN',
+		where=BACK,
+		max_error_rate=0.12,
+		min_overlap=5,
+		read_wildcards=False,
+		adapter_wildcards=True,
+		name="Foo")
+	read = Sequence(name="abc", sequence='CCCCAGAACTACAGTCCCGGC')
+	am = Match(astart=0, astop=17, rstart=5, rstop=21, matches=15, errors=2, front=None, 
+		adapter=adapter, read=read)
+	print(am.get_info_record())
+	assert am.get_info_record() == (
+		"abc",
+		2,
+		5,
+		21,
+		'CCCCA',
+		'GAACTACAGTCCCGGC',
+		'',
+		'Foo',
+		'', 
+		'', 
+		''
+	)
diff --git a/tests/testalign.py b/tests/testalign.py
new file mode 100644
index 0000000..0ede180
--- /dev/null
+++ b/tests/testalign.py
@@ -0,0 +1,123 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.align import (locate, compare_prefixes, compare_suffixes,
+	Aligner)
+from cutadapt.adapters import BACK
+
+
+class TestAligner():
+	def test(self):
+		reference = 'CTCCAGCTTAGACATATC'
+		aligner = Aligner(reference, 0.1, flags=BACK)
+		aligner.locate('CC')
+
+	def test_100_percent_error_rate(self):
+		reference = 'GCTTAGACATATC'
+		aligner = Aligner(reference, 1.0, flags=BACK)
+		aligner.locate('CAA')
+
+
+def test_polya():
+	s = 'AAAAAAAAAAAAAAAAA'
+	t = 'ACAGAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
+	result = locate(s, t, 0.0, BACK)
+	#start_s, stop_s, start_t, stop_t, matches, cost = result
+	assert result == (0, len(s), 4, 4 + len(s), len(s), 0)
+
+
+# Sequences with IUPAC wildcards
+# R=A|G, Y=C|T, S=G|C, W=A|T, K=G|T, M=A|C, B=C|G|T, D=A|G|T, H=A|C|T, V=A|C|G,
+# N=A|C|G|T, X={}
+WILDCARD_SEQUENCES = [
+	'CCCATTGATC',  # original sequence without wildcards
+	'CCCRTTRATC',  # R=A|G
+	'YCCATYGATC',  # Y=C|T
+	'CSSATTSATC',  # S=G|C
+	'CCCWWWGATC',  # W=A|T
+	'CCCATKKATC',  # K=G|T
+	'CCMATTGMTC',  # M=A|C
+	'BCCATTBABC',  # B=C|G|T
+	'BCCATTBABC',  # B
+	'CCCDTTDADC',  # D=A|G|T
+	'CHCATHGATC',  # H=A|C|T
+	'CVCVTTVATC',  # V=A|C|G
+	'CCNATNGATC',  # N=A|C|G|T
+	'CCCNTTNATC',  # N
+#   'CCCXTTXATC',  # X
+]
+
+
+def test_compare_prefixes():
+	assert compare_prefixes('AAXAA', 'AAAAATTTTTTTTT') == (0, 5, 0, 5, 4, 1)
+	assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (0, 5, 0, 5, 5, 0)
+	assert compare_prefixes('AANAA', 'AACAATTTTTTTTT', wildcard_ref=True) == (0, 5, 0, 5, 5, 0)
+	assert compare_prefixes('XAAAAA', 'AAAAATTTTTTTTT') == (0, 6, 0, 6, 4, 2)
+
+	a = WILDCARD_SEQUENCES[0]
+	for s in WILDCARD_SEQUENCES:
+		r = s + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
+		result = compare_prefixes(a, r, wildcard_query=True)
+		assert result == (0, 10, 0, 10, 10, 0), result
+
+		result = compare_prefixes(r, a, wildcard_ref=True)
+		assert result == (0, 10, 0, 10, 10, 0)
+
+	for s in WILDCARD_SEQUENCES:
+		for t in WILDCARD_SEQUENCES:
+			r = s + 'GCCAGGG'
+			result = compare_prefixes(s, r, )
+			assert result == (0, 10, 0, 10, 10, 0)
+
+			result = compare_prefixes(r, s, wildcard_ref=True, wildcard_query=True)
+			assert result == (0, 10, 0, 10, 10, 0)
+
+	r = WILDCARD_SEQUENCES[0] + 'GCCAGG'
+	for wildc_ref in (False, True):
+		for wildc_query in (False, True):
+			result = compare_prefixes('CCCXTTXATC', r, wildcard_ref=wildc_ref, wildcard_query=wildc_query)
+			assert result == (0, 10, 0, 10, 8, 2)
+
+
+def test_compare_suffixes():
+	assert compare_suffixes('AAXAA', 'TTTTTTTAAAAA') == (0, 5, 7, 12, 4, 1)
+	assert compare_suffixes('AANAA', 'TTTTTTTAACAA', wildcard_ref=True) == (0, 5, 7, 12, 5, 0)
+	assert compare_suffixes('AANAA', 'TTTTTTTAACAA', wildcard_ref=True) == (0, 5, 7, 12, 5, 0)
+	assert compare_suffixes('AAAAAX', 'TTTTTTTAAAAA') == (0, 6, 6, 12, 4, 2)
+
+
+def test_wildcards_in_adapter():
+	r = 'CATCTGTCC' + WILDCARD_SEQUENCES[0] + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
+	for a in WILDCARD_SEQUENCES:
+		result = locate(a, r, 0.0, BACK, wildcard_ref=True)
+		assert result == (0, 10, 9, 19, 10, 0), result
+
+	a = 'CCCXTTXATC'
+	result = locate(a, r, 0.0, BACK, wildcard_ref=True)
+	assert result is None
+
+
+def test_wildcards_in_read():
+	a = WILDCARD_SEQUENCES[0]
+	for s in WILDCARD_SEQUENCES:
+		r = 'CATCTGTCC' + s + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
+		result = locate(a, r, 0.0, BACK, wildcard_query=True)
+		if 'X' in s:
+			assert result is None
+		else:
+			assert result == (0, 10, 9, 19, 10, 0), result
+
+
+def test_wildcards_in_both():
+	for a in WILDCARD_SEQUENCES:
+		for s in WILDCARD_SEQUENCES:
+			if 'X' in s or 'X' in a:
+				continue
+			r = 'CATCTGTCC' + s + 'GCCAGGGTTGATTCGGCTGATCTGGCCG'
+			result = locate(a, r, 0.0, BACK, wildcard_ref=True, wildcard_query=True)
+			assert result == (0, 10, 9, 19, 10, 0), result
+
+
+def test_no_match():
+	a = locate('CTGATCTGGCCG', 'AAAAGGG', 0.1, BACK)
+	assert a is None, a
diff --git a/tests/testcolorspace.py b/tests/testcolorspace.py
new file mode 100644
index 0000000..16a9d88
--- /dev/null
+++ b/tests/testcolorspace.py
@@ -0,0 +1,140 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.colorspace import encode, decode
+from cutadapt.scripts.cutadapt import main
+from .utils import run, datapath
+
+# If there are any unknown characters in the test sequence,
+# round tripping will only work if all characters after the
+# first unknown character are also unknown:
+# encode("TNGN") == "T444", but
+# decode("T444") == "TNNN".
+
+sequences = [
+	"",
+	"C",
+	"ACGGTC",
+	"TN",
+	"TN.",
+	"TNN.N",
+	"CCGGCAGCATTCATTACGACAACGTGGCACCGTGTTTTCTCGGTGGTA",
+	"TGCAGTTGATGATCGAAGAAAACGACATCATCAGCCAGCAAGTGC",
+	"CAGGGTTTGATGAGTGGCTGTGGGTGCTGGCGTATCCGGG"
+	]
+
+
+def test_encode():
+	assert encode("AA") == "A0"
+	assert encode("AC") == "A1"
+	assert encode("AG") == "A2"
+	assert encode("AT") == "A3"
+	assert encode("CA") == "C1"
+	assert encode("CC") == "C0"
+	assert encode("CG") == "C3"
+	assert encode("CT") == "C2"
+	assert encode("GA") == "G2"
+	assert encode("GC") == "G3"
+	assert encode("GG") == "G0"
+	assert encode("GT") == "G1"
+	assert encode("TA") == "T3"
+	assert encode("TC") == "T2"
+	assert encode("TG") == "T1"
+	assert encode("TT") == "T0"
+
+	assert encode("TN") == "T4"
+	assert encode("NT") == "N4"
+	assert encode("NN") == "N4"
+
+	assert encode("ACGGTC") == "A13012"
+	assert encode("TTT.N") == "T0044"
+	assert encode("TTNT.N") == "T04444"
+
+
+def test_decode():
+	for s in sequences:
+		expected = s.replace('.', 'N')
+		encoded = encode(s)
+		assert decode(encoded) == expected
+	assert decode('A.') == 'AN'
+	assert decode('C.') == 'CN'
+	assert decode('G.') == 'GN'
+	assert decode('T.') == 'TN'
+
+
+def test_qualtrim_csfastaqual():
+	'''-q with csfasta/qual files'''
+	run("-c -q 10", "solidqual.fastq", "solid.csfasta", 'solid.qual')
+
+
+def test_E3M():
+	'''Read the E3M dataset'''
+	# not really colorspace, but a fasta/qual file pair
+	main(['-o', '/dev/null', datapath("E3M.fasta"), datapath("E3M.qual")])
+
+
+def test_bwa():
+	'''MAQ-/BWA-compatible output'''
+	run("-c -e 0.12 -a 330201030313112312 -x 552: --maq", "solidmaq.fastq", "solid.csfasta", 'solid.qual')
+
+
+def test_bfast():
+	'''BFAST-compatible output'''
+	run("-c -e 0.12 -a 330201030313112312 -x abc: --strip-f3", "solidbfast.fastq", "solid.csfasta", 'solid.qual')
+
+
+def test_trim_095():
+	'''some reads properly trimmed since cutadapt 0.9.5'''
+	run("-c -e 0.122 -a 330201030313112312", "solid.fasta", "solid.fasta")
+
+
+def test_solid():
+	run("-c -e 0.122 -a 330201030313112312", "solid.fastq", "solid.fastq")
+
+
+def test_solid_basespace_adapter():
+	'''colorspace adapter given in basespace'''
+	run("-c -e 0.122 -a CGCCTTGGCCGTACAGCAG", "solid.fastq", "solid.fastq")
+
+
+def test_solid5p():
+	'''test 5' colorspace adapter'''
+	# this is not a real adapter, just a random string
+	# in colorspace: C0302201212322332333
+	run("-c -e 0.1 --trim-primer -g CCGGAGGTCAGCTCGCTATA", "solid5p.fasta", "solid5p.fasta")
+
+
+def test_solid5p_prefix_notrim():
+	'''test anchored 5' colorspace adapter, no primer trimming'''
+	run("-c -e 0.1 -g ^CCGGAGGTCAGCTCGCTATA", "solid5p-anchored.notrim.fasta", "solid5p.fasta")
+
+
+def test_solid5p_prefix():
+	'''test anchored 5' colorspace adapter'''
+	run("-c -e 0.1 --trim-primer -g ^CCGGAGGTCAGCTCGCTATA", "solid5p-anchored.fasta", "solid5p.fasta")
+
+
+def test_solid5p_fastq():
+	'''test 5' colorspace adapter'''
+	# this is not a real adapter, just a random string
+	# in colorspace: C0302201212322332333
+	run("-c -e 0.1 --trim-primer -g CCGGAGGTCAGCTCGCTATA", "solid5p.fastq", "solid5p.fastq")
+
+
+def test_solid5p_prefix_notrim_fastq():
+	'''test anchored 5' colorspace adapter, no primer trimming'''
+	run("-c -e 0.1 -g ^CCGGAGGTCAGCTCGCTATA", "solid5p-anchored.notrim.fastq", "solid5p.fastq")
+
+
+def test_solid5p_prefix_fastq():
+	'''test anchored 5' colorspace adapter'''
+	run("-c -e 0.1 --trim-primer -g ^CCGGAGGTCAGCTCGCTATA", "solid5p-anchored.fastq", "solid5p.fastq")
+
+
+def test_sra_fastq():
+	'''test SRA-formatted colorspace FASTQ'''
+	run("-c -e 0.1 --format sra-fastq -a CGCCTTGGCCGTACAGCAG", "sra.fastq", "sra.fastq")
+
+
+def test_no_zero_cap():
+	run("--no-zero-cap -c -e 0.122 -a CGCCTTGGCCGTACAGCAG", "solid-no-zerocap.fastq", "solid.fastq")
diff --git a/tests/testfilters.py b/tests/testfilters.py
new file mode 100644
index 0000000..3976e72
--- /dev/null
+++ b/tests/testfilters.py
@@ -0,0 +1,42 @@
+# coding: utf-8
+"""
+Tests write output (should it return True or False or write)
+"""
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.filters import NContentFilter, DISCARD, KEEP, LegacyPairedRedirector, PairedRedirector
+from cutadapt.seqio import Sequence
+
+def test_ncontentfilter():
+	# third parameter is True if read should be discarded
+	params = [
+		('AAA', 0, KEEP),
+		('AAA', 1, KEEP),
+		('AAACCTTGGN', 1, KEEP),
+		('AAACNNNCTTGGN', 0.5, KEEP),
+		('NNNNNN', 1, DISCARD),
+		('ANAAAA', 1/6, KEEP),
+		('ANAAAA', 0, DISCARD)
+	]
+	for seq, count, expected in params:
+		filter = NContentFilter(count=count)
+		_seq = Sequence('read1', seq, qualities='#'*len(seq))
+		assert filter(_seq) == expected
+
+
+def test_ncontentfilter_paired():
+	params = [
+		('AAA', 'AAA', 0, KEEP),
+		('AAAN', 'AAA', 0, DISCARD),
+		('AAA', 'AANA', 0, DISCARD),
+		('ANAA', 'AANA', 1, KEEP),
+	]
+	for seq1, seq2, count, expected in params:
+		filter = NContentFilter(count=count)
+		filter_legacy = LegacyPairedRedirector(None, filter)
+		filter_both = PairedRedirector(None, filter)
+		read1 = Sequence('read1', seq1, qualities='#'*len(seq1))
+		read2 = Sequence('read1', seq2, qualities='#'*len(seq2))
+		assert filter_legacy(read1, read2) == filter(read1)
+		# discard entire pair if one of the reads fulfills criteria
+		assert filter_both(read1, read2) == expected
diff --git a/tests/testmodifiers.py b/tests/testmodifiers.py
new file mode 100644
index 0000000..5101755
--- /dev/null
+++ b/tests/testmodifiers.py
@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.seqio import Sequence
+from cutadapt.modifiers import UnconditionalCutter, NEndTrimmer, QualityTrimmer
+
+def test_unconditional_cutter():
+	uc = UnconditionalCutter(length=5)
+	s = 'abcdefg'
+	assert UnconditionalCutter(length=2)(s) == 'cdefg'
+	assert UnconditionalCutter(length=-2)(s) == 'abcde'
+	assert UnconditionalCutter(length=100)(s) == ''
+	assert UnconditionalCutter(length=-100)(s) == ''
+
+
+def test_nend_trimmer():
+	trimmer = NEndTrimmer()
+	seqs = ['NNNNAAACCTTGGNNN', 'NNNNAAACNNNCTTGGNNN', 'NNNNNN']
+	trims = ['AAACCTTGG', 'AAACNNNCTTGG', '']
+	for seq, trimmed in zip(seqs, trims):
+		_seq = Sequence('read1', seq, qualities='#'*len(seq))
+		_trimmed = Sequence('read1', trimmed, qualities='#'*len(trimmed))
+		assert trimmer(_seq) == _trimmed
+
+
+def test_quality_trimmer():
+	read = Sequence('read1', 'ACGTTTACGTA', '##456789###')
+
+	qt = QualityTrimmer(10, 10, 33)
+	assert qt(read) == Sequence('read1', 'GTTTAC', '456789')
+
+	qt = QualityTrimmer(0, 10, 33)
+	assert qt(read) == Sequence('read1', 'ACGTTTAC', '##456789')
+
+	qt = QualityTrimmer(10, 0, 33)
+	assert qt(read) == Sequence('read1', 'GTTTACGTA', '456789###')
diff --git a/tests/testpaired.py b/tests/testpaired.py
new file mode 100644
index 0000000..51cc1d0
--- /dev/null
+++ b/tests/testpaired.py
@@ -0,0 +1,273 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+import shutil
+from nose.tools import raises
+from cutadapt.scripts import cutadapt
+from .utils import run, files_equal, datapath, cutpath, redirect_stderr, temporary_path
+
+def run_paired(params, in1, in2, expected1, expected2):
+	if type(params) is str:
+		params = params.split()
+	with temporary_path('tmp1-' + expected1) as p1:
+		with temporary_path('tmp2-' + expected2) as p2:
+			params += ['-o', p1, '-p', p2]
+			params += [datapath(in1), datapath(in2)]
+			assert cutadapt.main(params) is None
+			assert files_equal(cutpath(expected1), p1)
+			assert files_equal(cutpath(expected2), p2)
+
+
+def run_interleaved(params, inpath, expected):
+	if type(params) is str:
+		params = params.split()
+	with temporary_path(expected) as tmp:
+		params += ['--interleaved', '-o', tmp, datapath(inpath)]
+		assert cutadapt.main(params) is None
+		assert files_equal(cutpath(expected), tmp)
+
+
+def run_interleaved2(params, inpath, expected1, expected2):
+	assert False  # unused function
+	if type(params) is str:
+		params = params.split()
+	with temporary_path('tmp1-' + expected1) as p1:
+		with temporary_path('tmp2-' + expected2) as p2:
+			params += ['--interleaved', '-o', p1, '-p', p2]
+		params += [datapath(inpath)]
+		assert cutadapt.main(params) is None
+		assert files_equal(cutpath(expected), p1)
+		assert files_equal(cutpath(expected), p2)
+
+
+def test_paired_separate():
+	'''test separate trimming of paired-end reads'''
+	run('-a TTAGACATAT', 'paired-separate.1.fastq', 'paired.1.fastq')
+	run('-a CAGTGGAGTA', 'paired-separate.2.fastq', 'paired.2.fastq')
+
+
+def test_paired_end_legacy():
+	'''--paired-output, not using -A/-B/-G'''
+	# the -m 14 filters out one read, which should then also be filtered out in the second output file
+	# -q 10 should not change anything: qualities in file 1 are high enough,
+	# qualities in file 2 should not be inspected.
+	run_paired('-a TTAGACATAT -m 14 -q 10',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='paired.m14.1.fastq', expected2='paired.m14.2.fastq'
+	)
+
+
+def test_untrimmed_paired_output():
+	with temporary_path("tmp-untrimmed.1.fastq") as untrimmed1:
+		with temporary_path("tmp-untrimmed.2.fastq") as untrimmed2:
+			run_paired(['-a', 'TTAGACATAT',
+				'--untrimmed-output', untrimmed1,
+				'--untrimmed-paired-output', untrimmed2],
+				in1='paired.1.fastq', in2='paired.2.fastq',
+				expected1='paired-trimmed.1.fastq', expected2='paired-trimmed.2.fastq'
+			)
+			assert files_equal(cutpath('paired-untrimmed.1.fastq'), untrimmed1)
+			assert files_equal(cutpath('paired-untrimmed.2.fastq'), untrimmed2)
+
+
+def test_explicit_format_with_paired():
+	# Use --format=fastq with input files whose extension is .txt
+	with temporary_path("paired.1.txt") as txt1:
+		with temporary_path("paired.2.txt") as txt2:
+			shutil.copyfile(datapath("paired.1.fastq"), txt1)
+			shutil.copyfile(datapath("paired.2.fastq"), txt2)
+			run_paired('--format=fastq -a TTAGACATAT -m 14',
+				in1=txt1, in2=txt2,
+				expected1='paired.m14.1.fastq',
+				expected2='paired.m14.2.fastq'
+			)
+
+
+def test_no_trimming_legacy():
+	# make sure that this doesn't divide by zero
+	cutadapt.main(['-a', 'XXXXX', '-o', '/dev/null', '-p', '/dev/null', datapath('paired.1.fastq'), datapath('paired.2.fastq')])
+
+
+def test_no_trimming():
+	# make sure that this doesn't divide by zero
+	cutadapt.main(['-a', 'XXXXX', '-A', 'XXXXX', '-o', '/dev/null', '-p', '/dev/null', datapath('paired.1.fastq'), datapath('paired.2.fastq')])
+
+
+ at raises(SystemExit)
+def test_missing_file():
+	with redirect_stderr():
+		cutadapt.main(['-a', 'XX', '--paired-output', 'out.fastq', datapath('paired.1.fastq')])
+
+
+ at raises(SystemExit)
+def test_first_too_short():
+	with temporary_path("truncated.1.fastq") as trunc1:
+		# Create a truncated file in which the last read is missing
+		with open(datapath('paired.1.fastq')) as f:
+			lines = f.readlines()
+			lines = lines[:-4]
+		with open(trunc1, 'w') as f:
+			f.writelines(lines)
+		with redirect_stderr():
+			cutadapt.main('-a XX --paired-output out.fastq'.split() + [trunc1, datapath('paired.2.fastq')])
+
+
+ at raises(SystemExit)
+def test_second_too_short():
+	with temporary_path("truncated.2.fastq") as trunc2:
+		# Create a truncated file in which the last read is missing
+		with open(datapath('paired.2.fastq')) as f:
+			lines = f.readlines()
+			lines = lines[:-4]
+		with open(trunc2, 'w') as f:
+			f.writelines(lines)
+		with redirect_stderr():
+			cutadapt.main('-a XX --paired-output out.fastq'.split() + [datapath('paired.1.fastq'), trunc2])
+
+
+ at raises(SystemExit)
+def test_unmatched_read_names():
+	with temporary_path("swapped.1.fastq") as swapped:
+		# Create a file in which reads 2 and are swapped
+		with open(datapath('paired.1.fastq')) as f:
+			lines = f.readlines()
+			lines = lines[0:4] + lines[8:12] + lines[4:8] + lines[12:]
+		with open(swapped, 'w') as f:
+			f.writelines(lines)
+		with redirect_stderr():
+			cutadapt.main('-a XX -o out1.fastq --paired-output out2.fastq'.split() + [swapped, datapath('paired.2.fastq')])
+
+
+def test_legacy_minlength():
+	'''Ensure -m is not applied to second read in a pair in legacy mode'''
+	run_paired('-a XXX -m 27',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='paired-m27.1.fastq', expected2='paired-m27.2.fastq'
+	)
+
+
+def test_paired_end():
+	'''single-pass paired-end with -m'''
+	run_paired('-a TTAGACATAT -A CAGTGGAGTA -m 14',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='paired.1.fastq', expected2='paired.2.fastq'
+	)
+
+
+def test_paired_anchored_back_no_indels():
+	run_paired("-a BACKADAPTER$ -A BACKADAPTER$ -N --no-indels",
+		in1='anchored-back.fasta', in2='anchored-back.fasta',
+		expected1='anchored-back.fasta', expected2="anchored-back.fasta"
+	)
+
+
+def test_paired_end_qualtrim():
+	'''single-pass paired-end with -q and -m'''
+	run_paired('-q 20 -a TTAGACATAT -A CAGTGGAGTA -m 14 -M 90',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='pairedq.1.fastq', expected2='pairedq.2.fastq'
+	)
+
+
+def test_paired_end_qualtrim_swapped():
+	'''single-pass paired-end with -q and -m, but files swapped'''
+	run_paired('-q 20 -a CAGTGGAGTA -A TTAGACATAT -m 14',
+		in1='paired.2.fastq', in2='paired.1.fastq',
+		expected1='pairedq.2.fastq', expected2='pairedq.1.fastq'
+	)
+
+
+def test_paired_end_cut():
+	run_paired('-u 3 -u -1 -U 4 -U -2',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='pairedu.1.fastq', expected2='pairedu.2.fastq'
+	)
+
+
+def test_paired_end_A_only():
+	run_paired('-A CAGTGGAGTA',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='paired-onlyA.1.fastq', expected2='paired-onlyA.2.fastq'
+	)
+
+
+def test_discard_untrimmed():
+	# issue #146
+	# the first adapter is a sequence cut out from the first read
+	run_paired('-a CTCCAGCTTAGACATATC -A XXXXXXXX --discard-untrimmed',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='empty.fastq', expected2='empty.fastq'
+	)
+
+
+def test_discard_trimmed():
+	run_paired('-A C -O 1 --discard-trimmed',  # applies everywhere
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='empty.fastq', expected2='empty.fastq'
+	)
+
+
+def test_interleaved():
+	'''single-pass interleaved paired-end with -q and -m'''
+	run_interleaved('-q 20 -a TTAGACATAT -A CAGTGGAGTA -m 14 -M 90',
+		inpath='interleaved.fastq', expected='interleaved.fastq'
+	)
+
+
+ at raises(SystemExit)
+def test_interleaved_no_paired_output():
+	with temporary_path("temp-paired.1.fastq") as p1:
+		with temporary_path("temp-paired.2.fastq") as p2:
+			params = '-a XX --interleaved'.split()
+			with redirect_stderr():
+				params += [ '-o', p1, '-p1', p2, 'paired.1.fastq', 'paired.2.fastq']
+				cutadapt.main(params)
+
+"""
+def test_interleaved_input_paired_output():
+	'''single-pass interleaved paired-end with -q and -m, paired output'''
+	run_interleaved2('-q 20 -a TTAGACATAT -A CAGTGGAGTA -m 14 -M 90',
+		inpath='interleaved.fastq', expected1='pairedq1.fastq', expected2='pairedq2.fastq'
+	)
+"""
+
+
+def test_pair_filter():
+	run_paired('--pair-filter=both -a TTAGACATAT -A GGAGTA -m 14',
+		in1='paired.1.fastq', in2='paired.2.fastq',
+		expected1='paired-filterboth.1.fastq', expected2='paired-filterboth.2.fastq'
+	)
+
+
+def test_too_short_paired_output():
+	with temporary_path("temp-too-short.1.fastq") as p1:
+		with temporary_path("temp-too-short.2.fastq") as p2:
+			run_paired('-a TTAGACATAT -A CAGTGGAGTA -m 14 --too-short-output '
+				'{0} --too-short-paired-output {1}'.format(p1, p2),
+				in1='paired.1.fastq', in2='paired.2.fastq',
+				expected1='paired.1.fastq', expected2='paired.2.fastq'
+			)
+			assert files_equal(cutpath('paired-too-short.1.fastq'), p1)
+			assert files_equal(cutpath('paired-too-short.2.fastq'), p2)
+
+
+def test_too_long_output():
+	with temporary_path("temp-too-long.1.fastq") as p1:
+		with temporary_path("temp-too-long.2.fastq") as p2:
+			run_paired('-a TTAGACATAT -A CAGTGGAGTA -M 14 --too-long-output '
+				'{0} --too-long-paired-output {1}'.format(p1, p2),
+				in1='paired.1.fastq', in2='paired.2.fastq',
+				expected1='paired-too-short.1.fastq', expected2='paired-too-short.2.fastq'
+			)
+			assert files_equal(cutpath('paired.1.fastq'), p1)
+			assert files_equal(cutpath('paired.2.fastq'), p2)
+
+
+ at raises(SystemExit)
+def test_too_short_output_paired_option_missing():
+	with temporary_path("temp-too-short.1.fastq") as p1:
+		run_paired('-a TTAGACATAT -A CAGTGGAGTA -m 14 --too-short-output '
+			'{0}'.format(p1),
+			in1='paired.1.fastq', in2='paired.2.fastq',
+			expected1='paired.1.fastq', expected2='paired.2.fastq'
+		)
diff --git a/tests/testqualtrim.py b/tests/testqualtrim.py
new file mode 100644
index 0000000..173b264
--- /dev/null
+++ b/tests/testqualtrim.py
@@ -0,0 +1,14 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.seqio import Sequence
+from cutadapt.qualtrim import nextseq_trim_index
+
+def test_nextseq_trim():
+	s = Sequence('n', '', '')
+	assert nextseq_trim_index(s, cutoff=22) == 0
+	s = Sequence('n',
+		'TCTCGTATGCCGTCTTATGCTTGAAAAAAAAAAGGGGGGGGGGGGGGGGGNNNNNNNNNNNGGNGG',
+		'AA//EAEE//A6///E//A//EA/EEEEEEAEA//EEEEEEEEEEEEEEE###########EE#EA'
+	)
+	assert nextseq_trim_index(s, cutoff=22) == 33
diff --git a/tests/tests.py b/tests/tests.py
new file mode 100644
index 0000000..241e169
--- /dev/null
+++ b/tests/tests.py
@@ -0,0 +1,383 @@
+# coding: utf-8
+# TODO
+# test with the --output option
+# test reading from standard input
+from __future__ import print_function, division, absolute_import
+
+import os
+import sys
+from nose.tools import raises
+from cutadapt.scripts import cutadapt
+from cutadapt.compat import StringIO
+from .utils import run, files_equal, datapath, cutpath, redirect_stderr, temporary_path
+
+def test_example():
+	run('-N -b ADAPTER', 'example.fa', 'example.fa')
+
+def test_small():
+	run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq')
+
+def test_empty():
+	'''empty input'''
+	run('-a TTAGACATATCTCCGTCG', 'empty.fastq', 'empty.fastq')
+
+def test_newlines():
+	'''DOS/Windows newlines'''
+	run('-e 0.12 -b TTAGACATATCTCCGTCG', 'dos.fastq', 'dos.fastq')
+
+def test_lowercase():
+	'''lowercase adapter'''
+	run('-b ttagacatatctccgtcg', 'lowercase.fastq', 'small.fastq')
+
+
+def test_rest():
+	'''-r/--rest-file'''
+	with temporary_path('rest.tmp') as rest_tmp:
+		run(['-b', 'ADAPTER', '-N', '-r', rest_tmp], "rest.fa", "rest.fa")
+		assert files_equal(datapath('rest.txt'), rest_tmp)
+
+
+def test_restfront():
+	with temporary_path("rest.txt") as path:
+		run(['-g', 'ADAPTER', '-N', '-r', path], "restfront.fa", "rest.fa")
+		assert files_equal(datapath('restfront.txt'), path)
+
+
+def test_discard():
+	'''--discard'''
+	run("-b TTAGACATATCTCCGTCG --discard", "discard.fastq", "small.fastq")
+
+
+def test_discard_untrimmed():
+	'''--discard-untrimmed'''
+	run('-b CAAGAT --discard-untrimmed', 'discard-untrimmed.fastq', 'small.fastq')
+
+
+def test_plus():
+	'''test if sequence name after the "+" is retained'''
+	run("-e 0.12 -b TTAGACATATCTCCGTCG", "plus.fastq", "plus.fastq")
+
+
+def test_extensiontxtgz():
+	'''automatic recognition of "_sequence.txt.gz" extension'''
+	run("-b TTAGACATATCTCCGTCG", "s_1_sequence.txt", "s_1_sequence.txt.gz")
+
+
+def test_format():
+	'''the -f/--format parameter'''
+	run("-f fastq -b TTAGACATATCTCCGTCG", "small.fastq", "small.myownextension")
+
+
+def test_minimum_length():
+	'''-m/--minimum-length'''
+	run("-c -m 5 -a 330201030313112312", "minlen.fa", "lengths.fa")
+
+
+def test_too_short():
+	'''--too-short-output'''
+	run("-c -m 5 -a 330201030313112312 --too-short-output tooshort.tmp.fa", "minlen.fa", "lengths.fa")
+	assert files_equal(datapath('tooshort.fa'), "tooshort.tmp.fa")
+	os.remove('tooshort.tmp.fa')
+
+
+def test_too_short_no_primer():
+	'''--too-short-output and --trim-primer'''
+	run("-c -m 5 -a 330201030313112312 --trim-primer --too-short-output tooshort.tmp.fa", "minlen.noprimer.fa", "lengths.fa")
+	assert files_equal(datapath('tooshort.noprimer.fa'), "tooshort.tmp.fa")
+	os.remove('tooshort.tmp.fa')
+
+
+def test_maximum_length():
+	'''-M/--maximum-length'''
+	run("-c -M 5 -a 330201030313112312", "maxlen.fa", "lengths.fa")
+
+
+def test_too_long():
+	'''--too-long-output'''
+	run("-c -M 5 --too-long-output toolong.tmp.fa -a 330201030313112312", "maxlen.fa", "lengths.fa")
+	assert files_equal(datapath('toolong.fa'), "toolong.tmp.fa")
+	os.remove('toolong.tmp.fa')
+
+
+def test_length_tag():
+	'''454 data; -n and --length-tag'''
+	run("-n 3 -e 0.1 --length-tag length= " \
+		"-b TGAGACACGCAACAGGGGAAAGGCAAGGCACACAGGGGATAGG "\
+		"-b TCCATCTCATCCCTGCGTGTCCCATCTGTTCCCTCCCTGTCTCA", '454.fa', '454.fa')
+
+def test_overlap_a():
+	'''-O/--overlap with -a (-c omitted on purpose)'''
+	run("-O 10 -a 330201030313112312 -e 0.0 -N", "overlapa.fa", "overlapa.fa")
+
+def test_overlap_b():
+	'''-O/--overlap with -b'''
+	run("-O 10 -b TTAGACATATCTCCGTCG -N", "overlapb.fa", "overlapb.fa")
+
+def test_qualtrim():
+	'''-q with low qualities'''
+	run("-q 10 -a XXXXXX", "lowqual.fastq", "lowqual.fastq")
+
+def test_qualbase():
+	'''-q with low qualities, using ascii(quality+64) encoding'''
+	run("-q 10 --quality-base 64 -a XXXXXX", "illumina64.fastq", "illumina64.fastq")
+
+def test_quality_trim_only():
+	'''only trim qualities, do not remove adapters'''
+	run("-q 10 --quality-base 64", "illumina64.fastq", "illumina64.fastq")
+
+def test_twoadapters():
+	'''two adapters'''
+	run("-a AATTTCAGGAATT -a GTTCTCTAGTTCT", "twoadapters.fasta", "twoadapters.fasta")
+
+def test_polya():
+	'''poly-A tails'''
+	run("-m 24 -O 10 -a AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA", "polya.fasta", "polya.fasta")
+
+def test_polya_brace_notation():
+	'''poly-A tails'''
+	run("-m 24 -O 10 -a A{35}", "polya.fasta", "polya.fasta")
+
+def test_mask_adapter():
+	'''mask adapter with N (reads maintain the same length)'''
+	run("-b CAAG -n 3 --mask-adapter", "anywhere_repeat.fastq", "anywhere_repeat.fastq")
+
+def test_gz_multiblock():
+	'''compressed gz file with multiple blocks (created by concatenating two .gz files)'''
+	run("-b TTAGACATATCTCCGTCG", "small.fastq", "multiblock.fastq.gz")
+
+def test_suffix():
+	'''-y/--suffix parameter, combined with _F3'''
+	run("-c -e 0.12 -a 1=330201030313112312 -y _my_suffix_{name} --strip-f3", "suffix.fastq", "solid.csfasta", 'solid.qual')
+
+def test_read_wildcard():
+	'''test wildcards in reads'''
+	run("--match-read-wildcards -b ACGTACGT", "wildcard.fa", "wildcard.fa")
+
+def test_adapter_wildcard():
+	'''wildcards in adapter'''
+	for adapter_type, expected in (
+			("-a", "wildcard_adapter.fa"),
+			("-b", "wildcard_adapter_anywhere.fa")):
+		with temporary_path("wildcardtmp.txt") as wildcardtmp:
+			run("--wildcard-file {0} {1} ACGTNNNACGT".format(wildcardtmp, adapter_type),
+				expected, "wildcard_adapter.fa")
+			with open(wildcardtmp) as wct:
+				lines = wct.readlines()
+			lines = [ line.strip() for line in lines ]
+			assert lines == ['AAA 1', 'GGG 2', 'CCC 3b', 'TTT 4b']
+
+def test_wildcard_N():
+	'''test 'N' wildcard matching with no allowed errors'''
+	run("-e 0 -a GGGGGGG --match-read-wildcards", "wildcardN.fa", "wildcardN.fa")
+
+def test_illumina_adapter_wildcard():
+	run("-a VCCGAMCYUCKHRKDCUBBCNUWNSGHCGU", "illumina.fastq", "illumina.fastq.gz")
+
+def test_adapter_front():
+	'''test adapter in front'''
+	run("--front ADAPTER -N", "examplefront.fa", "example.fa")
+
+def test_literal_N():
+	'''test matching literal 'N's'''
+	run("-N -e 0.2 -a NNNNNNNNNNNNNN", "trimN3.fasta", "trimN3.fasta")
+
+def test_literal_N2():
+	run("-N -O 1 -g NNNNNNNNNNNNNN", "trimN5.fasta", "trimN5.fasta")
+
+def test_literal_N_brace_notation():
+	'''test matching literal 'N's'''
+	run("-N -e 0.2 -a N{14}", "trimN3.fasta", "trimN3.fasta")
+
+def test_literal_N2_brace_notation():
+	run("-N -O 1 -g N{14}", "trimN5.fasta", "trimN5.fasta")
+
+def test_anchored_front():
+	run("-g ^FRONTADAPT -N", "anchored.fasta", "anchored.fasta")
+
+def test_anchored_front_ellipsis_notation():
+	run("-a FRONTADAPT... -N", "anchored.fasta", "anchored.fasta")
+
+def test_anchored_back():
+	run("-a BACKADAPTER$ -N", "anchored-back.fasta", "anchored-back.fasta")
+
+def test_anchored_back_no_indels():
+	run("-a BACKADAPTER$ -N --no-indels", "anchored-back.fasta", "anchored-back.fasta")
+
+
+def test_no_indels():
+	run('-a TTAGACATAT -g GAGATTGCCA --no-indels', 'no_indels.fasta', 'no_indels.fasta')
+
+
+def test_issue_46():
+	'''issue 46 - IndexError with --wildcard-file'''
+	with temporary_path("wildcardtmp.txt") as wildcardtmp:
+		run("--anywhere=AACGTN --wildcard-file={0}".format(wildcardtmp), "issue46.fasta", "issue46.fasta")
+
+def test_strip_suffix():
+	run("--strip-suffix _sequence -a XXXXXXX", "stripped.fasta", "simple.fasta")
+
+
+def test_info_file():
+	# The true adapter sequence in the illumina.fastq.gz data set is
+	# GCCTAACTTCTTAGACTGCCTTAAGGACGT (fourth base is different)
+	#
+	with temporary_path("infotmp.txt") as infotmp:
+		run(["--info-file", infotmp, '-a', 'adapt=GCCGAACTTCTTAGACTGCCTTAAGGACGT'], "illumina.fastq", "illumina.fastq.gz")
+		assert files_equal(cutpath('illumina.info.txt'), infotmp)
+
+
+def test_info_file_times():
+	with temporary_path("infotmp.txt") as infotmp:
+		run(["--info-file", infotmp, '--times', '2', '-a', 'adapt=GCCGAACTTCTTA', '-a', 'adapt2=GACTGCCTTAAGGACGT'], "illumina5.fastq", "illumina5.fastq")
+		assert files_equal(cutpath('illumina5.info.txt'), infotmp)
+
+
+def test_info_file_fasta():
+	with temporary_path("infotmp.txt") as infotmp:
+		# Just make sure that it runs
+		run(['--info-file', infotmp, '-a', 'TTAGACATAT', '-g', 'GAGATTGCCA', '--no-indels'], 'no_indels.fasta', 'no_indels.fasta')
+
+
+def test_named_adapter():
+	run("-a MY_ADAPTER=GCCGAACTTCTTAGACTGCCTTAAGGACGT", "illumina.fastq", "illumina.fastq.gz")
+
+
+def test_adapter_with_U():
+	run("-a GCCGAACUUCUUAGACUGCCUUAAGGACGU", "illumina.fastq", "illumina.fastq.gz")
+
+
+def test_no_trim():
+	''' --no-trim '''
+	run("--no-trim --discard-untrimmed -a CCCTAGTTAAAC", 'no-trim.fastq', 'small.fastq')
+
+
+def test_bzip2():
+	'''test bzip2 support'''
+	run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq.bz2')
+
+
+try:
+	import lzma
+
+	def test_xz():
+		'''test xz support'''
+		run('-b TTAGACATATCTCCGTCG', 'small.fastq', 'small.fastq.xz')
+except ImportError:
+	pass
+
+
+ at raises(SystemExit)
+def test_qualfile_only():
+	with redirect_stderr():
+		cutadapt.main(['file.qual'])
+
+
+ at raises(SystemExit)
+def test_no_args():
+	with redirect_stderr():
+		cutadapt.main([])
+
+
+ at raises(SystemExit)
+def test_two_fastqs():
+	with redirect_stderr():
+		cutadapt.main([datapath('paired.1.fastq'), datapath('paired.2.fastq')])
+
+
+def test_anchored_no_indels():
+	'''anchored 5' adapter, mismatches only (no indels)'''
+	run('-g ^TTAGACATAT --no-indels -e 0.1', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta')
+
+
+def test_anchored_no_indels_wildcard_read():
+	'''anchored 5' adapter, mismatches only (no indels), but wildcards in the read count as matches'''
+	run('-g ^TTAGACATAT --match-read-wildcards --no-indels -e 0.1', 'anchored_no_indels_wildcard.fasta', 'anchored_no_indels.fasta')
+
+
+def test_anchored_no_indels_wildcard_adapt():
+	'''anchored 5' adapter, mismatches only (no indels), but wildcards in the adapter count as matches'''
+	run('-g ^TTAGACANAT --no-indels -e 0.1', 'anchored_no_indels.fasta', 'anchored_no_indels.fasta')
+
+
+def test_unconditional_cut_front():
+	run('-u 5', 'unconditional-front.fastq', 'small.fastq')
+
+
+def test_unconditional_cut_back():
+	run('-u -5', 'unconditional-back.fastq', 'small.fastq')
+
+
+def test_unconditional_cut_both():
+	run('-u -5 -u 5', 'unconditional-both.fastq', 'small.fastq')
+
+
+def test_untrimmed_output():
+	with temporary_path('untrimmed.tmp.fastq') as tmp:
+		run(['-a', 'TTAGACATATCTCCGTCG', '--untrimmed-output', tmp], 'small.trimmed.fastq', 'small.fastq')
+		assert files_equal(cutpath('small.untrimmed.fastq'), tmp)
+
+
+def test_adapter_file():
+	run('-a file:' + datapath('adapter.fasta'), 'illumina.fastq', 'illumina.fastq.gz')
+
+def test_adapter_file_5p_anchored():
+	run('-N -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
+
+def test_adapter_file_3p_anchored():
+	run('-N -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
+
+
+def test_adapter_file_5p_anchored_no_indels():
+	run('-N --no-indels -g file:' + datapath('prefix-adapter.fasta'), 'anchored.fasta', 'anchored.fasta')
+
+
+def test_adapter_file_3p_anchored_no_indels():
+	run('-N --no-indels -a file:' + datapath('suffix-adapter.fasta'), 'anchored-back.fasta', 'anchored-back.fasta')
+
+
+def test_demultiplex():
+	multiout = os.path.join(os.path.dirname(__file__), 'data', 'tmp-demulti.{name}.fasta')
+	params = ['-a', 'first=AATTTCAGGAATT', '-a', 'second=GTTCTCTAGTTCT', '-o', multiout, datapath('twoadapters.fasta')]
+	assert cutadapt.main(params) is None
+	assert files_equal(cutpath('twoadapters.first.fasta'), multiout.format(name='first'))
+	assert files_equal(cutpath('twoadapters.second.fasta'), multiout.format(name='second'))
+	assert files_equal(cutpath('twoadapters.unknown.fasta'), multiout.format(name='unknown'))
+	os.remove(multiout.format(name='first'))
+	os.remove(multiout.format(name='second'))
+	os.remove(multiout.format(name='unknown'))
+
+
+def test_max_n():
+	run('--max-n 0', 'maxn0.fasta', 'maxn.fasta')
+	run('--max-n 1', 'maxn1.fasta', 'maxn.fasta')
+	run('--max-n 2', 'maxn2.fasta', 'maxn.fasta')
+	run('--max-n 0.2', 'maxn0.2.fasta', 'maxn.fasta')
+	run('--max-n 0.4', 'maxn0.4.fasta', 'maxn.fasta')
+
+
+def test_quiet_is_quiet():
+	captured_standard_output = StringIO()
+	captured_standard_error = StringIO()
+	try:
+		old_stdout = sys.stdout
+		old_stderr = sys.stderr
+		sys.stdout = captured_standard_output
+		sys.stderr = captured_standard_error
+		cutadapt.main(['-o', '/dev/null', '--quiet', '-a', 'XXXX', datapath('illumina.fastq.gz')])
+	finally:
+		sys.stdout = old_stdout
+		sys.stderr = old_stderr
+	assert captured_standard_output.getvalue() == ''
+	assert captured_standard_error.getvalue() == ''
+
+
+def test_nextseq():
+	run('--nextseq-trim 22', 'nextseq.fastq', 'nextseq.fastq')
+
+
+def test_linked():
+	run('-a AAAAAAAAAA...TTTTTTTTTT', 'linked.fasta', 'linked.fasta')
+
+
+def test_fasta():
+	run('-a TTAGACATATCTCCGTCG', 'small.fasta', 'small.fastq')
diff --git a/tests/testseqio.py b/tests/testseqio.py
new file mode 100644
index 0000000..ef8b0b6
--- /dev/null
+++ b/tests/testseqio.py
@@ -0,0 +1,352 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+import sys
+import os
+import shutil
+from textwrap import dedent
+from nose.tools import raises
+from tempfile import mkdtemp
+from cutadapt.seqio import (Sequence, ColorspaceSequence, FormatError,
+	FastaReader, FastqReader, FastaQualReader, InterleavedSequenceReader,
+	FastaWriter, FastqWriter, InterleavedSequenceWriter, open as openseq,
+	sequence_names_match)
+from cutadapt.compat import StringIO
+
+
+# files tests/data/simple.fast{q,a}
+simple_fastq = [
+	Sequence("first_sequence", "SEQUENCE1", ":6;;8<=:<"),
+	Sequence("second_sequence", "SEQUENCE2", "83<??:(61")
+	]
+
+simple_fasta = [ Sequence(x.name, x.sequence, None) for x in simple_fastq ]
+
+
+class TestSequence:
+	@raises(FormatError)
+	def test_too_many_qualities(self):
+		Sequence(name="name", sequence="ACGT", qualities="#####")
+
+	@raises(FormatError)
+	def test_too_many_qualities_colorspace(self):
+		ColorspaceSequence(name="name", sequence="T0123", qualities="#####")
+
+	@raises(FormatError)
+	def test_invalid_primer(self):
+		ColorspaceSequence(name="name", sequence="K0123", qualities="####")
+
+
+class TestFastaReader:
+	def test(self):
+		with FastaReader("tests/data/simple.fasta") as f:
+			reads = list(f)
+		assert reads == simple_fasta
+
+		fasta = StringIO(">first_sequence\nSEQUENCE1\n>second_sequence\nSEQUENCE2\n")
+		reads = list(FastaReader(fasta))
+		assert reads == simple_fasta
+
+	def test_with_comments(self):
+		fasta = StringIO(dedent(
+			"""
+			# a comment
+			# another one
+			>first_sequence
+			SEQUENCE1
+			>second_sequence
+			SEQUENCE2
+			"""))
+		reads = list(FastaReader(fasta))
+		assert reads == simple_fasta
+
+	@raises(FormatError)
+	def test_wrong_format(self):
+		fasta = StringIO(dedent(
+			"""
+			# a comment
+			# another one
+			unexpected
+			>first_sequence
+			SEQUENCE1
+			>second_sequence
+			SEQUENCE2
+			"""))
+		reads = list(FastaReader(fasta))
+
+	def test_fastareader_keeplinebreaks(self):
+		with FastaReader("tests/data/simple.fasta", keep_linebreaks=True) as f:
+			reads = list(f)
+		assert reads[0] == simple_fasta[0]
+		assert reads[1].sequence == 'SEQUEN\nCE2'
+
+	def test_context_manager(self):
+		filename = "tests/data/simple.fasta"
+		with open(filename) as f:
+			assert not f.closed
+			reads = list(openseq(f))
+			assert not f.closed
+		assert f.closed
+
+		with FastaReader(filename) as sr:
+			tmp_sr = sr
+			assert not sr._file.closed
+			reads = list(sr)
+			assert not sr._file.closed
+		assert tmp_sr._file is None
+		# Open it a second time
+		with FastaReader(filename) as sr:
+			pass
+
+
+class TestFastqReader:
+	def test_fastqreader(self):
+		with FastqReader("tests/data/simple.fastq") as f:
+			reads = list(f)
+		assert reads == simple_fastq
+
+	def test_fastqreader_dos(self):
+		with FastqReader("tests/data/dos.fastq") as f:
+			dos_reads = list(f)
+		with FastqReader("tests/data/small.fastq") as f:
+			unix_reads = list(f)
+		assert dos_reads == unix_reads
+
+	@raises(FormatError)
+	def test_fastq_wrongformat(self):
+		with FastqReader("tests/data/withplus.fastq") as f:
+			reads = list(f)
+
+	@raises(FormatError)
+	def test_fastq_incomplete(self):
+		fastq = StringIO("@name\nACGT+\n")
+		with FastqReader(fastq) as fq:
+			list(fq)
+
+	def test_context_manager(self):
+		filename = "tests/data/simple.fastq"
+		with open(filename) as f:
+			assert not f.closed
+			reads = list(openseq(f))
+			assert not f.closed
+		assert f.closed
+
+		with FastqReader(filename) as sr:
+			tmp_sr = sr
+			assert not sr._file.closed
+			reads = list(sr)
+			assert not sr._file.closed
+		assert tmp_sr._file is None
+
+
+class TestFastaQualReader:
+	@raises(FormatError)
+	def test_mismatching_read_names(self):
+		fasta = StringIO(">name\nACG")
+		qual = StringIO(">nome\n3 5 7")
+		list(FastaQualReader(fasta, qual))
+
+	@raises(FormatError)
+	def test_invalid_quality_value(self):
+		fasta = StringIO(">name\nACG")
+		qual = StringIO(">name\n3 xx 7")
+		list(FastaQualReader(fasta, qual))
+
+
+class TestSeqioOpen:
+	def setup(self):
+		self._tmpdir = mkdtemp()
+
+	def teardown(self):
+		shutil.rmtree(self._tmpdir)
+
+	def test_sequence_reader(self):
+		# test the autodetection
+		with openseq("tests/data/simple.fastq") as f:
+			reads = list(f)
+		assert reads == simple_fastq
+
+		with openseq("tests/data/simple.fasta") as f:
+			reads = list(f)
+		assert reads == simple_fasta
+
+		with open("tests/data/simple.fastq") as f:
+			reads = list(openseq(f))
+		assert reads == simple_fastq
+
+		# make the name attribute unavailable
+		f = StringIO(open("tests/data/simple.fastq").read())
+		reads = list(openseq(f))
+		assert reads == simple_fastq
+
+		f = StringIO(open("tests/data/simple.fasta").read())
+		reads = list(openseq(f))
+		assert reads == simple_fasta
+
+	def test_autodetect_fasta_format(self):
+		path = os.path.join(self._tmpdir, 'tmp.fasta')
+		with openseq(path, mode='w') as f:
+			assert isinstance(f, FastaWriter)
+			for seq in simple_fastq:
+				f.write(seq)
+		assert list(openseq(path)) == simple_fasta
+
+	def test_write_qualities_to_fasta(self):
+		path = os.path.join(self._tmpdir, 'tmp.fasta')
+		with openseq(path, mode='w', qualities=True) as f:
+			assert isinstance(f, FastaWriter)
+			for seq in simple_fastq:
+				f.write(seq)
+		assert list(openseq(path)) == simple_fasta
+
+	def test_autodetect_fastq_format(self):
+		path = os.path.join(self._tmpdir, 'tmp.fastq')
+		with openseq(path, mode='w') as f:
+			assert isinstance(f, FastqWriter)
+			for seq in simple_fastq:
+				f.write(seq)
+		assert list(openseq(path)) == simple_fastq
+
+	@raises(ValueError)
+	def test_fastq_qualities_missing(self):
+		path = os.path.join(self._tmpdir, 'tmp.fastq')
+		openseq(path, mode='w', qualities=False)
+
+
+class TestInterleavedReader:
+	def test(self):
+		expected = [
+			(Sequence('read1/1 some text', 'TTATTTGTCTCCAGC', '##HHHHHHHHHHHHH'),
+			Sequence('read1/2 other text', 'GCTGGAGACAAATAA', 'HHHHHHHHHHHHHHH')),
+			(Sequence('read3/1', 'CCAACTTGATATTAATAACA', 'HHHHHHHHHHHHHHHHHHHH'),
+			Sequence('read3/2', 'TGTTATTAATATCAAGTTGG', '#HHHHHHHHHHHHHHHHHHH'))
+		]
+		reads = list(InterleavedSequenceReader("tests/cut/interleaved.fastq"))
+		for (r1, r2), (e1, e2) in zip(reads, expected):
+			print(r1, r2, e1, e2)
+
+		assert reads == expected
+		with openseq("tests/cut/interleaved.fastq", interleaved=True) as f:
+			reads = list(f)
+		assert reads == expected
+
+	@raises(FormatError)
+	def test_missing_partner(self):
+		s = StringIO('@r1\nACG\n+\nHHH')
+		list(InterleavedSequenceReader(s))
+
+	@raises(FormatError)
+	def test_incorrectly_paired(self):
+		s = StringIO('@r1/1\nACG\n+\nHHH\n at wrong_name\nTTT\n+\nHHH')
+		list(InterleavedSequenceReader(s))
+
+
+class TestFastaWriter:
+	def setup(self):
+		self._tmpdir = mkdtemp()
+		self.path = os.path.join(self._tmpdir, 'tmp.fasta')
+
+	def teardown(self):
+		shutil.rmtree(self._tmpdir)
+
+	def test(self):
+		with FastaWriter(self.path) as fw:
+			fw.write("name", "CCATA")
+			fw.write("name2", "HELLO")
+		assert fw._file.closed
+		with open(self.path) as t:
+			assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
+
+	def test_linelength(self):
+		with FastaWriter(self.path, line_length=3) as fw:
+			fw.write("r1", "ACG")
+			fw.write("r2", "CCAT")
+			fw.write("r3", "TACCAG")
+		assert fw._file.closed
+		with open(self.path) as t:
+			d = t.read()
+			assert d == '>r1\nACG\n>r2\nCCA\nT\n>r3\nTAC\nCAG\n'
+
+	def test_write_sequence_object(self):
+		with FastaWriter(self.path) as fw:
+			fw.write(Sequence("name", "CCATA"))
+			fw.write(Sequence("name2", "HELLO"))
+		assert fw._file.closed
+		with open(self.path) as t:
+			assert t.read() == '>name\nCCATA\n>name2\nHELLO\n'
+
+	def test_write_to_file_like_object(self):
+		sio = StringIO()
+		with FastaWriter(sio) as fw:
+			fw.write(Sequence("name", "CCATA"))
+			fw.write(Sequence("name2", "HELLO"))
+			assert sio.getvalue() == '>name\nCCATA\n>name2\nHELLO\n'
+		assert not fw._file.closed
+
+	def test_write_zero_length_sequence(self):
+		sio = StringIO()
+		with FastaWriter(sio) as fw:
+			fw.write(Sequence("name", ""))
+			assert sio.getvalue() == '>name\n\n', '{0!r}'.format(sio.getvalue())
+
+
+class TestFastqWriter:
+	def setup(self):
+		self._tmpdir = mkdtemp()
+		self.path = os.path.join(self._tmpdir, 'tmp.fastq')
+
+	def teardown(self):
+		shutil.rmtree(self._tmpdir)
+
+	def test(self):
+		with FastqWriter(self.path) as fq:
+			fq.writeseq("name", "CCATA", "!#!#!")
+			fq.writeseq("name2", "HELLO", "&&&!&&")
+		assert fq._file.closed
+		with open(self.path) as t:
+			assert t.read() == '@name\nCCATA\n+\n!#!#!\n at name2\nHELLO\n+\n&&&!&&\n'
+
+	def test_twoheaders(self):
+		with FastqWriter(self.path) as fq:
+			fq.write(Sequence("name", "CCATA", "!#!#!", name2="name"))
+			fq.write(Sequence("name2", "HELLO", "&&&!&", name2="name2"))
+		assert fq._file.closed
+		with open(self.path) as t:
+			assert t.read() == '@name\nCCATA\n+name\n!#!#!\n at name2\nHELLO\n+name2\n&&&!&\n'
+
+	def test_write_to_file_like_object(self):
+		sio = StringIO()
+		with FastqWriter(sio) as fq:
+			fq.writeseq("name", "CCATA", "!#!#!")
+			fq.writeseq("name2", "HELLO", "&&&!&&")
+		assert sio.getvalue() == '@name\nCCATA\n+\n!#!#!\n at name2\nHELLO\n+\n&&&!&&\n'
+
+
+class TestInterleavedWriter:
+	def test(self):
+		reads = [
+			(Sequence('A/1 comment', 'TTA', '##H'),
+			Sequence('A/2 comment', 'GCT', 'HH#')),
+			(Sequence('B/1', 'CC', 'HH'),
+			Sequence('B/2', 'TG', '#H'))
+		]
+		sio = StringIO()
+		with InterleavedSequenceWriter(sio) as writer:
+			for read1, read2 in reads:
+				writer.write(read1, read2)
+		assert sio.getvalue() == '@A/1 comment\nTTA\n+\n##H\n at A/2 comment\nGCT\n+\nHH#\n at B/1\nCC\n+\nHH\n at B/2\nTG\n+\n#H\n'
+
+
+class TestPairedSequenceReader:
+	def test_sequence_names_match(self):
+		def match(name1, name2):
+			seq1 = Sequence(name1, 'ACGT')
+			seq2 = Sequence(name2, 'AACC')
+			return sequence_names_match(seq1, seq2)
+
+		assert match('abc', 'abc')
+		assert match('abc/1', 'abc/2')
+		assert match('abc.1', 'abc.2')
+		assert match('abc1', 'abc2')
+		assert not match('abc', 'xyz')
+
diff --git a/tests/testtrim.py b/tests/testtrim.py
new file mode 100644
index 0000000..09c3102
--- /dev/null
+++ b/tests/testtrim.py
@@ -0,0 +1,27 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+from cutadapt.seqio import ColorspaceSequence, Sequence
+from cutadapt.adapters import Adapter, ColorspaceAdapter, PREFIX, BACK
+from cutadapt.scripts.cutadapt import AdapterCutter
+
+def test_cs_5p():
+	read = ColorspaceSequence("name", "0123", "DEFG", "T")
+	adapter = ColorspaceAdapter("CG", PREFIX, 0.1)
+	cutter = AdapterCutter([adapter])
+	trimmed_read = cutter(read)
+	# no assertion here, just make sure the above code runs without
+	# an exception
+
+
+def test_statistics():
+	read = Sequence('name', 'AAAACCCCAAAA')
+	adapters = [Adapter('CCCC', BACK, 0.1)]
+	cutter = AdapterCutter(adapters, times=3)
+	trimmed_read = cutter(read)
+	# TODO make this a lot simpler
+	trimmed_bp = 0
+	for adapter in adapters:
+		for d in (adapter.lengths_front, adapter.lengths_back):
+			trimmed_bp += sum(seqlen * count for (seqlen, count) in d.items())
+	assert trimmed_bp <= len(read), trimmed_bp
diff --git a/tests/testxopen.py b/tests/testxopen.py
new file mode 100644
index 0000000..2d714c4
--- /dev/null
+++ b/tests/testxopen.py
@@ -0,0 +1,101 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+import gzip
+import os
+import random
+import sys
+from nose.tools import raises
+from cutadapt.xopen import xopen, lzma
+from .utils import temporary_path
+
+base = "tests/data/small.fastq"
+files = [ base + ext for ext in ['', '.gz', '.bz2' ] ]
+if lzma is not None:
+	files.append(base + '.xz')
+
+def test_context_manager():
+	major, minor = sys.version_info[0:2]
+	for name in files:
+		if major == 2 and minor == 6:
+			continue  # Py26 compression libraries do not support context manager protocol.
+		with xopen(name, 'rt') as f:
+			lines = list(f)
+			assert len(lines) == 12
+			assert lines[5] == 'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
+			f.close()
+
+def test_append():
+	for ext in ["", ".gz"]:  # BZ2 does NOT support append
+		text = "AB"
+		if ext != "":
+			text = text.encode("utf-8")  # On Py3, need to send BYTES, not unicode
+		reference = text + text
+		print("Trying ext=%s" % ext)
+		with temporary_path('truncated.fastq' + ext) as path:
+			try:
+				os.unlink(path)
+			except OSError:
+				pass
+			with xopen(path, 'a') as f:
+				f.write(text)
+			with xopen(path, 'a') as f:
+				f.write(text)
+			with xopen(path, 'r') as f:
+				for appended in f:
+					pass
+				try:
+					reference = reference.decode("utf-8")
+				except AttributeError:
+					pass
+				print(appended)
+				print(reference)
+				assert appended == reference
+
+def test_xopen_text():
+	for name in files:
+		f = xopen(name, 'rt')
+		lines = list(f)
+		assert len(lines) == 12
+		assert lines[5] == 'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
+		f.close()
+
+
+def test_xopen_binary():
+	for name in files:
+		f = xopen(name, 'rb')
+		lines = list(f)
+		assert len(lines) == 12
+		assert lines[5] == b'AGCCGCTANGACGGGTTGGCCCTTAGACGTATCT\n', name
+		f.close()
+
+
+def create_truncated_file(path):
+	# Random text
+	text = ''.join(random.choice('ABCDEFGHIJKLMNOPQRSTUVWXYZ') for _ in range(200))
+	f = xopen(path, 'w')
+	f.write(text)
+	f.close()
+	f = open(path, 'a')
+	f.truncate(os.stat(path).st_size - 10)
+	f.close()
+
+
+# Disable these tests in Python 3.2 and 3.3
+if not ((3, 2) <= sys.version_info[:2] <= (3, 3)):
+	@raises(EOFError)
+	def test_truncated_gz():
+		with temporary_path('truncated.gz') as path:
+			create_truncated_file(path)
+			f = xopen(path, 'r')
+			f.read()
+			f.close()
+
+
+	@raises(EOFError)
+	def test_truncated_gz_iter():
+		with temporary_path('truncated.gz') as path:
+			create_truncated_file(path)
+			f = xopen(path, 'r')
+			for line in f:
+				pass
+			f.close()
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..473e598
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,50 @@
+# coding: utf-8
+from __future__ import print_function, division, absolute_import
+
+import sys, os
+from contextlib import contextmanager
+from cutadapt.scripts import cutadapt
+
+ at contextmanager
+def redirect_stderr():
+	"Send stderr to stdout. Nose doesn't capture stderr, yet."
+	old_stderr = sys.stderr
+	sys.stderr = sys.stdout
+	yield
+	sys.stderr = old_stderr
+
+
+ at contextmanager
+def temporary_path(name):
+	directory = os.path.join(os.path.dirname(__file__), 'testtmp')
+	if not os.path.isdir(directory):
+		os.mkdir(directory)
+	path = os.path.join(directory, name)
+	yield path
+	os.remove(path)
+
+
+def datapath(path):
+	return os.path.join(os.path.dirname(__file__), 'data', path)
+
+
+def cutpath(path):
+	return os.path.join(os.path.dirname(__file__), 'cut', path)
+
+
+def files_equal(path1, path2):
+	return os.system("diff -u {0} {1}".format(path1, path2)) == 0
+
+
+def run(params, expected, inpath, inpath2=None):
+	if type(params) is str:
+		params = params.split()
+	with temporary_path(expected) as tmp_fastaq:
+		params += ['-o', tmp_fastaq ] # TODO not parallelizable
+		params += [ datapath(inpath) ]
+		if inpath2:
+			params += [ datapath(inpath2) ]
+		assert cutadapt.main(params) is None
+		# TODO redirect standard output
+		assert files_equal(cutpath(expected), tmp_fastaq)
+	# TODO diff log files
diff --git a/tox.ini b/tox.ini
new file mode 100644
index 0000000..43c4de1
--- /dev/null
+++ b/tox.ini
@@ -0,0 +1,6 @@
+[tox]
+envlist = py26,py27,py33,py34,py35
+
+[testenv]
+deps = nose
+commands = nosetests -P tests

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/cutadapt.git



More information about the debian-med-commit mailing list