[med-svn] [ariba] 01/02: New upstream version 2.3.0+ds

Sascha Steinbiss satta at debian.org
Wed Oct 12 16:10:35 UTC 2016


This is an automated email from the git hooks/post-receive script.

satta pushed a commit to branch master
in repository ariba.

commit 4c3626816798ca60cc56e5726399173a3f152d11
Author: Sascha Steinbiss <satta at debian.org>
Date:   Wed Oct 12 16:00:03 2016 +0000

    New upstream version 2.3.0+ds
---
 Dockerfile                                         |  19 +
 README.md                                          |  38 +-
 ariba/assembly.py                                  |  18 +-
 ariba/cluster.py                                   |  24 +-
 ariba/clusters.py                                  |  15 +-
 ariba/ext/minimap_ariba.cpp                        |  51 +-
 ariba/external_progs.py                            |   6 +-
 ariba/mash.py                                      |  15 +-
 ariba/ref_genes_getter.py                          |  43 +-
 ariba/ref_preparer.py                              |  13 +-
 ariba/reference_data.py                            |  18 +-
 ariba/report.py                                    | 188 ++++---
 ariba/samtools_variants.py                         |   5 +-
 ariba/summary.py                                   |  20 +-
 ariba/summary_cluster.py                           | 101 ++--
 ariba/summary_cluster_variant.py                   |  67 ++-
 ariba/tasks/getref.py                              |   3 +-
 ariba/tasks/prepareref.py                          |   1 +
 ariba/tasks/run.py                                 |   4 +
 ariba/test_run_data/metadata.tsv                   |   1 +
 ariba/test_run_data/reads_1.fq                     |   4 +
 ariba/test_run_data/reads_2.fq                     |   4 +
 .../test_run_data/ref_fasta_to_make_reads_from.fa  |   5 +
 ariba/test_run_data/ref_seqs.fa                    |   5 +
 ariba/tests/assembly_test.py                       |  14 +-
 ariba/tests/cluster_test.py                        | 133 +++--
 ariba/tests/clusters_test.py                       |  11 +
 .../data/cluster_test_full_run_delete_codon.fa     |   3 +
 .../data/cluster_test_full_run_delete_codon.tsv    |   1 +
 .../for_reads.fa                                   |  20 +
 .../cluster_test_full_run_delete_codon/reads_1.fq  | 588 ++++++++++++++++++++
 .../cluster_test_full_run_delete_codon/reads_2.fq  | 588 ++++++++++++++++++++
 .../references.fa                                  |   3 +
 .../data/cluster_test_full_run_insert_codon.fa     |   3 +
 .../data/cluster_test_full_run_insert_codon.tsv    |   1 +
 .../for_reads.fa                                   |  20 +
 .../cluster_test_full_run_insert_codon/reads_1.fq  | 592 +++++++++++++++++++++
 .../cluster_test_full_run_insert_codon/reads_2.fq  | 592 +++++++++++++++++++++
 .../references.fa                                  |   3 +
 .../data/cluster_test_full_run_multiple_vars.fa    |   5 +
 .../data/cluster_test_full_run_multiple_vars.tsv   |   2 +
 .../for_reads.fa                                   |  20 +
 .../cluster_test_full_run_multiple_vars/reads_1.fq | 584 ++++++++++++++++++++
 .../cluster_test_full_run_multiple_vars/reads_2.fq | 584 ++++++++++++++++++++
 .../references.fa                                  |   5 +
 .../data/cluster_test_full_run_ok_non_coding.fa    |   2 +-
 ...cluster_test_full_run_ref_not_in_cluster.in.fa} |   2 +-
 ...cluster_test_full_run_ref_not_in_cluster.in.tsv |   6 +
 ...uster_test_full_run_ref_not_in_cluster.mash.fa} |   7 +-
 ...er_test_full_run_ref_not_in_cluster.mash.fa.msh | Bin 0 -> 2760 bytes
 .../reads_1.fq                                     | 144 +++++
 .../reads_2.fq                                     | 144 +++++
 .../references.fa}                                 |   2 +-
 ...sters_minimap_reads_to_all_refs.out.clstr_count |   4 +-
 .../clusters_minimap_reads_to_all_refs.out.hist    |  37 +-
 ariba/tests/data/clusters_test_dummy_db.fa.msh     | Bin 0 -> 504 bytes
 ariba/tests/data/clusters_test_write_report.tsv    |   2 +-
 ariba/tests/data/reference_data_init_ok.rename.tsv |   2 +
 .../tests/data/reference_data_load_rename_file.tsv |   2 +
 ariba/tests/data/report_filter_test_init_bad.tsv   |   8 +-
 ariba/tests/data/report_filter_test_init_good.tsv  |  10 +-
 .../data/report_filter_test_load_report_bad.tsv    |   8 +-
 .../data/report_filter_test_load_report_good.tsv   |  10 +-
 .../tests/data/report_filter_test_run.expected.tsv |  12 +-
 ariba/tests/data/report_filter_test_run.in.tsv     |  18 +-
 .../tests/data/report_filter_test_write_report.tsv |   8 +-
 .../summary_gather_unfiltered_output_data.in.1.tsv |  12 +-
 .../summary_gather_unfiltered_output_data.in.2.tsv |  12 +-
 ...ample_test_column_names_tuples_and_het_snps.tsv |  16 +-
 .../summary_sample_test_column_summary_data.tsv    |  16 +-
 .../data/summary_sample_test_load_file.in.tsv      |  14 +-
 .../summary_sample_test_non_synon_variants.tsv     |  12 +-
 .../tests/data/summary_sample_test_var_groups.tsv  |  14 +-
 .../tests/data/summary_test_load_input_files.1.tsv |   6 +-
 .../tests/data/summary_test_load_input_files.2.tsv |  10 +-
 ariba/tests/data/summary_test_whole_run.in.1.tsv   |  34 +-
 ariba/tests/data/summary_test_whole_run.in.2.tsv   |  15 +-
 ariba/tests/data/summary_test_whole_run.out.csv    |   6 +-
 ariba/tests/data/summary_to_matrix.1.tsv           |  10 +-
 ariba/tests/data/summary_to_matrix.2.tsv           |  12 +-
 ariba/tests/read_filter_test.py                    |   8 +-
 ariba/tests/ref_preparer_test.py                   |   1 +
 ariba/tests/reference_data_test.py                 |  17 +
 ariba/tests/report_filter_test.py                  |  58 +-
 ariba/tests/samtools_variants_test.py              |  22 +-
 ariba/tests/summary_cluster_test.py                | 248 +++++----
 ariba/tests/summary_cluster_variant_test.py        |  73 ++-
 ariba/tests/summary_test.py                        |  56 +-
 scripts/ariba                                      |  61 ++-
 setup.py                                           |   2 +-
 90 files changed, 4981 insertions(+), 622 deletions(-)

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..82d13c8
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+#
+# This container will install ARIBA from master
+#
+FROM debian:testing
+
+#
+#  Authorship
+#
+MAINTAINER ap13 at sanger.ac.uk
+
+#
+# Install the dependancies
+#
+RUN apt-get update -qq && apt-get install -y git bowtie2 cd-hit fastaq libc6 libfml0 libgcc1 libminimap0 libstdc++6 mash mummer python3 python3-setuptools python3-dev python3-pysam python3-pymummer python3-dendropy gcc g++ zlib1g-dev 
+
+#
+# Get the latest code from github and install
+#
+RUN git clone https://github.com/sanger-pathogens/ariba.git && cd ariba && python3 setup.py install
diff --git a/README.md b/README.md
index 9b8dd54..bc52e03 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,21 @@ If the tests all pass, install:
 
     python3 setup.py install
 
+### Docker
+ARIBA can be run in a Docker container. First of all install Docker, then to install ARIBA run:
+
+    docker pull sangerpathogens/ariba
+
+To use ARIBA you would use a command such as this (substituting in your directories), where your files are assumed to be stored in /home/ubuntu/data:
+
+    docker run --rm -it -v /home/ubuntu/data:/data sangerpathogens/ariba ariba -h
+
+
+### Debian (testing)
+ARIBA is available in the latest version of Debian, and over time will progressively filter through to Ubuntu and other distributions which use Debian. To install it as root:
+
+    sudo apt-get install ariba
+
 
 ### Dependencies and environment variables
 
@@ -59,11 +74,10 @@ to the following dependencies.
 |----------------|------------------------|---------------------------|
 | Bowtie2        | `bowtie2`              | `$ARIBA_BOWTIE2`          |
 | CD-HIT (est)   | `cd-hit-est`           | `$ARIBA_CDHIT`            |
-| CD-HIT (est-2d)| `cd-hit-est-2d`        | `$ARIBA_CDHIT2D`          |
 | MASH           | `mash`                 | `$ARIBA_MASH`             |
 
 
-For example, you could specify an exact version of a Samtools executable
+For example, you could specify an exact version of a bowtie2 executable
 that you compiled and downloaded in your home directory (assuming BASH):
 
     export ARIBA_BOWTIE2=$HOME/bowtie2-2.1.0/bowtie2
@@ -82,16 +96,18 @@ are put in a temporary directory made by ARIBA.  The total size of these
 files is small, but there can be a many of them. This can be a
 problem when running large numbers (100s or 1000s) of jobs simultaneously
 on the same file system.
-By default, ARIBA creates a temporary directory for these files
-inside the output directory of each run.
+The parent directory of the temporary directory is determined in the
+following order of precedence:
+
+1. The value of the option `--tmp_dir` (if that option was used)
+2. The environment variable `$ARIBA_TMPDIR` (if it is set)
+3. The environment variable `$TMPDIR` (if it is set)
+4. If none of the above is found, then use the run's output directory.
 
 Each temporary directory
 is unique to one run of ARIBA, and is automatically deleted at the end
 of the run (even if ARIBA was killed by the user or crashed).
-The parent directory of the temporary
-directory can be changed using the environment variable
-`$ARIBA_TMPDIR`. The temporary directory for each run will be made
-inside `$ARIBA_TMPDIR`. For example,
+For example,
 
     export $ARIBA_TMPDIR=/tmp
 
@@ -103,12 +119,6 @@ will have a name of the form
 where the suffix `abcdef` is a random string of characters, chosen
 such that `/tmp/ariba.tmp.abcdef` does not already exist.
 
-The temporary directory can also be changed using the option
-`--tmp_dir` when running `ariba run`. Using this option takes precedence
-over the environment variable `$ARIBA_TMPDIR`. If neither are
-set, then ARIBA creates the temporary directory inside
-the output directory given to `ariba run`.
-
 The exception to the above is if the option `--noclean` is used.
 This forces the temporary directory to be placed in the output
 directory, and temporary files are kept. It is intended for
diff --git a/ariba/assembly.py b/ariba/assembly.py
index 88fd1c3..87c2273 100644
--- a/ariba/assembly.py
+++ b/ariba/assembly.py
@@ -18,6 +18,7 @@ class Assembly:
       final_assembly_fa,
       final_assembly_bam,
       log_fh,
+      mash_reference_fasta,
       scaff_name_prefix='scaffold',
       kmer=0,
       assembler='fermilite',
@@ -42,6 +43,7 @@ class Assembly:
         self.final_assembly_fa = os.path.abspath(final_assembly_fa)
         self.final_assembly_bam = os.path.abspath(final_assembly_bam)
         self.log_fh = log_fh
+        self.mash_reference_fasta = os.path.abspath(mash_reference_fasta)
         self.scaff_name_prefix = scaff_name_prefix
 
         self.ref_seq_name = None
@@ -377,14 +379,26 @@ class Assembly:
                 self.log_fh = None
                 return
 
-            masher = mash.Masher(self.ref_fastas, self.gapfilled_length_filtered, self.log_fh, self.extern_progs)
+            masher = mash.Masher(self.mash_reference_fasta, self.gapfilled_length_filtered, self.log_fh, self.extern_progs)
             self.ref_seq_name = masher.run(self.mash_dist_file)
             if self.ref_seq_name is None:
                 print('Could not determine closest reference sequence', file=self.log_fh)
                 self.log_fh = None
                 return
 
-            faidx.write_fa_subset({self.ref_seq_name}, self.ref_fastas, self.ref_fasta)
+            file_reader = pyfastaq.sequences.file_reader(self.ref_fastas)
+            for ref_seq in file_reader:
+                if self.ref_seq_name == ref_seq.id:
+                    f_out = pyfastaq.utils.open_file_write(self.ref_fasta)
+                    print(ref_seq, file=f_out)
+                    pyfastaq.utils.close(f_out)
+                    break
+            else:
+                print('Closest reference sequence ', self.ref_seq_name, ' does not belong to this cluster', file=self.log_fh)
+                self.ref_seq_name = None
+                self.log_fh = None
+                return
+
             print('Closest reference sequence according to mash: ', self.ref_seq_name, file=self.log_fh)
 
             contigs_both_strands = self._fix_contig_orientation(self.gapfilled_length_filtered, self.ref_fasta, self.final_assembly_fa, min_id=self.nucmer_min_id, min_length=self.nucmer_min_len, breaklen=self.nucmer_breaklen)
diff --git a/ariba/cluster.py b/ariba/cluster.py
index e5a73b0..efdc2cc 100644
--- a/ariba/cluster.py
+++ b/ariba/cluster.py
@@ -1,4 +1,5 @@
 import signal
+import traceback
 import os
 import atexit
 import random
@@ -6,7 +7,7 @@ import math
 import shutil
 import sys
 import pyfastaq
-from ariba import assembly, assembly_compare, assembly_variants, external_progs, flag, mapping, read_filter, report, samtools_variants
+from ariba import assembly, assembly_compare, assembly_variants, external_progs, flag, mapping, mash, report, samtools_variants
 
 class Error (Exception): pass
 
@@ -17,6 +18,7 @@ class Cluster:
       root_dir,
       name,
       refdata,
+      refdata_seqs_fasta_for_mash=None,
       total_reads=None,
       total_reads_bases=None,
       fail_file=None,
@@ -126,6 +128,13 @@ class Cluster:
         else:
             self.extern_progs = extern_progs
 
+        if refdata_seqs_fasta_for_mash is None:
+            mash.Masher.sketch(self.references_fa, True, self.extern_progs, verbose=False)
+            self.refdata_seqs_fasta_for_mash = self.references_fa
+        else:
+            self.refdata_seqs_fasta_for_mash = os.path.abspath(refdata_seqs_fasta_for_mash)
+            assert os.path.exists(self.refdata_seqs_fasta_for_mash + '.msh')
+
         self.random_seed = random_seed
         wanted_signals = [signal.SIGABRT, signal.SIGINT, signal.SIGSEGV, signal.SIGTERM]
         for s in wanted_signals:
@@ -174,9 +183,7 @@ class Cluster:
 
             self.refdata.write_seqs_to_fasta(self.references_fa, self.reference_names)
             self.log_fh = pyfastaq.utils.open_file_write(self.logfile)
-            self.read_store.get_reads(self.name, self.all_reads1, self.all_reads2)
-            rfilter = read_filter.ReadFilter(self.read_store, self.references_fa, self.name, self.log_fh, self.extern_progs)
-            self.total_reads, self.total_reads_bases = rfilter.run(self.all_reads1, self.all_reads2)
+            self.total_reads, self.total_reads_bases = self.read_store.get_reads(self.name, self.all_reads1, self.all_reads2, log_fh=self.log_fh)
             self.refdata.write_seqs_to_fasta(self.references_fa, self.reference_names)
 
         self.longest_ref_length = max([len(self.refdata.sequence(name)) for name in self.reference_names])
@@ -314,6 +321,7 @@ class Cluster:
               self.final_assembly_fa,
               self.final_assembly_bam,
               self.log_fh,
+              self.refdata_seqs_fasta_for_mash,
               scaff_name_prefix=self.name,
               kmer=self.assembly_kmer,
               assembler=self.assembler,
@@ -420,8 +428,12 @@ class Cluster:
             print('\nCould not get closest reference sequence\n', file=self.log_fh, flush=True)
             self.status_flag.add('ref_seq_choose_fail')
 
+        try:
+            self.report_lines = report.report_lines(self)
+        except:
+            print('Error making report for cluster ', self.name, '... traceback:', file=sys.stderr)
+            traceback.print_exc(file=sys.stderr)
+            raise Error('Error making report for cluster ' + self.name)
 
-        print('\nMaking report lines', file=self.log_fh, flush=True)
-        self.report_lines = report.report_lines(self)
         self._clean()
         atexit.unregister(self._atexit)
diff --git a/ariba/clusters.py b/ariba/clusters.py
index 1052f6f..f2eac5a 100644
--- a/ariba/clusters.py
+++ b/ariba/clusters.py
@@ -76,6 +76,9 @@ class Clusters:
         self.extern_progs = extern_progs
         self.clusters_tsv = os.path.abspath(os.path.join(refdata_dir, '02.cdhit.clusters.tsv'))
         self.all_ref_seqs_fasta = os.path.abspath(os.path.join(refdata_dir, '02.cdhit.all.fa'))
+        mash_file = self.all_ref_seqs_fasta + '.msh'
+        if not os.path.exists(mash_file):
+            raise Error('Error! Mash file ' + mash_file + ' not found.\nThe likely cause is that prepareref was run using an old version of ariba.\nIf this is the case, please rerun ariba preparef.')
 
         if version_report_lines is None:
             self.version_report_lines = []
@@ -95,8 +98,7 @@ class Clusters:
         self.cdhit_cluster_representatives_fa = self.cdhit_files_prefix + '.cluster_representatives.fa'
         self.bam_prefix = os.path.join(self.outdir, 'map_reads_to_cluster_reps')
         self.bam = self.bam_prefix + '.bam'
-        self.report_file_all_tsv = os.path.join(self.outdir, 'report.all.tsv')
-        self.report_file_all_xls = os.path.join(self.outdir, 'report.all.xls')
+        self.report_file_all_tsv = os.path.join(self.outdir, 'debug.report.tsv')
         self.report_file_filtered = os.path.join(self.outdir, 'report.tsv')
         self.catted_assembled_seqs_fasta = os.path.join(self.outdir, 'assembled_seqs.fa.gz')
         self.catted_genes_matching_refs_fasta = os.path.join(self.outdir, 'assembled_genes.fa.gz')
@@ -138,6 +140,8 @@ class Clusters:
         if tmp_dir is None:
             if 'ARIBA_TMPDIR' in os.environ:
                 tmp_dir = os.path.abspath(os.environ['ARIBA_TMPDIR'])
+            elif 'TMPDIR' in os.environ:
+                tmp_dir = os.path.abspath(os.environ['TMPDIR'])
             else:
                 tmp_dir = self.outdir
 
@@ -372,6 +376,12 @@ class Clusters:
 
         for cluster_name in sorted(self.cluster_to_dir):
             counter += 1
+
+            if self.cluster_read_counts[cluster_name] <= 2:
+                if self.verbose:
+                    print('Not constructing cluster ', cluster_name, ' because it only has ', self.cluster_read_counts[cluster_name], ' reads (', counter, ' of ', len(self.cluster_to_dir), ')', sep='')
+                continue
+
             if self.verbose:
                 print('Constructing cluster ', cluster_name, ' (', counter, ' of ', len(self.cluster_to_dir), ')', sep='')
             new_dir = self.cluster_to_dir[cluster_name]
@@ -381,6 +391,7 @@ class Clusters:
                 new_dir,
                 cluster_name,
                 self.refdata,
+                refdata_seqs_fasta_for_mash=self.all_ref_seqs_fasta,
                 fail_file=os.path.join(self.fails_dir, cluster_name),
                 read_store=self.read_store,
                 reference_names=self.cluster_ids[cluster_name],
diff --git a/ariba/ext/minimap_ariba.cpp b/ariba/ext/minimap_ariba.cpp
index 612945e..91fd82b 100644
--- a/ariba/ext/minimap_ariba.cpp
+++ b/ariba/ext/minimap_ariba.cpp
@@ -23,6 +23,7 @@ void chooseCluster(std::string outfile, std::map<std::string, uint64_t>& refname
 void writeClusterCountsFile(std::string outfile, const std::map<std::string, uint64_t>& readCounters, const std::map<std::string, uint64_t>& baseCounters);
 void writeInsertHistogramFile(std::string outfile, const std::map<uint32_t, uint32_t>& insertHist);
 void writeProperPairsFile(std::string outfile, uint32_t properPairs);
+bool readMappingOk(const mm_reg1_t* r, const mm_idx_t* mi, const kseq_t *ks1, uint32_t endTolerance);
 
 int run_minimap(char *clustersFileIn, char *refFileIn, char *readsFile1In, char *readsFile2In, char *outprefixIn);
 
@@ -143,18 +144,24 @@ int run_minimap(char *clustersFileIn, char *refFileIn, char *readsFile1In, char
             for (j  =0; j < n_reg1; ++j)
             {
                 const mm_reg1_t *r = &reg1[j];
-                refnames.insert(mi->name[r->rid]);
-                refnameToScore[mi->name[r->rid]] += r->cnt;
-                uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
-                positions1[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+                if (readMappingOk(r, mi, ks1, (int) 1.1 * k))
+                {
+                    refnames.insert(mi->name[r->rid]);
+                    refnameToScore[mi->name[r->rid]] += r->cnt;
+                    uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
+                    positions1[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+                }
             }
             for (j  =0; j < n_reg2; ++j)
             {
                 const mm_reg1_t *r = &reg2[j];
-                refnames.insert(mi->name[r->rid]);
-                refnameToScore[mi->name[r->rid]] += r->cnt;
-                uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
-                positions2[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+                if (readMappingOk(r, mi, ks2, (int) 1.1 * k))
+                {
+                    refnames.insert(mi->name[r->rid]);
+                    refnameToScore[mi->name[r->rid]] += r->cnt;
+                    uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
+                    positions2[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+                }
             }
 
             bool foundProperPair = false;
@@ -349,3 +356,31 @@ void writeProperPairsFile(std::string outfile, uint32_t properPairs)
     ofs << properPairs << '\n';
     ofs.close();
 }
+
+
+bool readMappingOk(const mm_reg1_t* r, const mm_idx_t* mi, const kseq_t *ks, uint32_t endTolerance)
+{
+    // coords are same style as python (0-based, end is one past the end)
+    assert (r->qs < r->qe && r->rs <  r->re);
+
+    if (r->qe - r->qs < std::min((unsigned) 50, (int) 0.5 * ks->seq.l))
+    {
+        return false;
+    }
+
+    uint32_t refLength = mi->len[r->rid];
+    bool startOk;
+    bool endOk;
+    if (r->rev)
+    {
+        startOk = (r->qs < endTolerance || refLength - r->re < endTolerance);
+        endOk = (ks->seq.l - r->qe < endTolerance || r->rs < endTolerance);
+    }
+    else
+    {
+        startOk = (r->qs < endTolerance || r->rs < endTolerance);
+        endOk = (ks->seq.l - r->qe < endTolerance || refLength - r->re < endTolerance);
+    }
+
+    return (startOk && endOk);
+}
diff --git a/ariba/external_progs.py b/ariba/external_progs.py
index 1fe3bc6..622c0e6 100644
--- a/ariba/external_progs.py
+++ b/ariba/external_progs.py
@@ -12,7 +12,7 @@ class Error (Exception): pass
 prog_to_default = {
     'bowtie2': 'bowtie2',
     'cdhit': 'cd-hit-est',
-    'cdhit2d': 'cd-hit-est-2d',
+    #'cdhit2d': 'cd-hit-est-2d',
     #'gapfiller': 'GapFiller.pl',
     'mash': 'mash',
     'nucmer' : 'nucmer',
@@ -27,7 +27,7 @@ prog_to_env_var = {x: 'ARIBA_' + x.upper() for x in prog_to_default if x not in
 prog_to_version_cmd = {
     'bowtie2': ('--version', re.compile('.*bowtie2.*version (.*)$')),
     'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
-    'cdhit2d': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
+    #'cdhit2d': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
     #'gapfiller': ('', re.compile('^Usage: .*pl \[GapFiller_(.*)\]')),
     'mash': ('', re.compile('^Mash version (.*)$')),
     'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
@@ -39,7 +39,7 @@ prog_to_version_cmd = {
 min_versions = {
     'bowtie2': '2.1.0',
     'cdhit': '4.6',
-    'cdhit2d': '4.6',
+    #'cdhit2d': '4.6',
     'mash': '1.0.2',
     'nucmer': '3.1',
     #'spades': '3.5.0',
diff --git a/ariba/mash.py b/ariba/mash.py
index 97a9496..b4c3ffb 100644
--- a/ariba/mash.py
+++ b/ariba/mash.py
@@ -19,9 +19,13 @@ class Masher:
             self.extern_progs = extern_progs
 
 
-    def _sketch(self, infile, individual):
+    @classmethod
+    def sketch(cls, infile, individual, extern_progs, verbose=True, verbose_filehandle=None):
+        if verbose:
+            assert verbose_filehandle is not None
+
         cmd_list = [
-            self.extern_progs.exe('mash'),
+            extern_progs.exe('mash'),
             'sketch',
             '-s 100000'
         ]
@@ -30,7 +34,7 @@ class Masher:
             cmd_list.append('-i')
 
         cmd_list.append(infile)
-        common.syscall(' '.join(cmd_list), verbose=True, verbose_filehandle=self.log_fh)
+        common.syscall(' '.join(cmd_list), verbose=verbose, verbose_filehandle=verbose_filehandle)
 
 
     def _dist(self, outfile):
@@ -45,8 +49,9 @@ class Masher:
 
 
     def run(self, outfile):
-        self._sketch(self.reference_fa, True)
-        self._sketch(self.query_fa, False)
+        if not os.path.exists(self.reference_fa + '.msh'):
+            Masher.sketch(self.reference_fa, True, self.extern_progs, verbose=True, verbose_filehandle=self.log_fh)
+        Masher.sketch(self.query_fa, False, self.extern_progs, verbose=True, verbose_filehandle=self.log_fh)
         self._dist(outfile)
         if os.path.getsize(outfile) == 0:
             return None
diff --git a/ariba/ref_genes_getter.py b/ariba/ref_genes_getter.py
index e59efbb..34b4d8c 100644
--- a/ariba/ref_genes_getter.py
+++ b/ariba/ref_genes_getter.py
@@ -25,10 +25,11 @@ argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resi
 
 
 class RefGenesGetter:
-    def __init__(self, ref_db, version=None):
+    def __init__(self, ref_db, version=None, debug=False):
         if ref_db not in allowed_ref_dbs:
             raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
         self.ref_db=ref_db
+        self.debug = debug
         self.genetic_code = 11
         self.max_download_attempts = 3
         self.sleep_time = 2
@@ -185,6 +186,9 @@ class RefGenesGetter:
         pyfastaq.utils.close(f_out_tsv)
         pyfastaq.utils.close(f_out_log)
         os.chdir(current_dir)
+        if not self.debug:
+            shutil.rmtree(tmpdir)
+
         print('Extracted data and written ARIBA input files\n')
         print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
         print('You can use them with ARIBA like this:')
@@ -216,22 +220,36 @@ class RefGenesGetter:
         print('Combining downloaded fasta files...')
         fout_fa = pyfastaq.utils.open_file_write(final_fasta)
         fout_tsv = pyfastaq.utils.open_file_write(final_tsv)
+        used_names = {}
 
-        for filename in os.listdir('database'):
+        for filename in os.listdir():
             if filename.endswith('.fsa'):
                 print('   ', filename)
-                prefix = filename.split('.')[0]
-                file_reader = pyfastaq.sequences.file_reader(os.path.join('database', filename))
+                file_reader = pyfastaq.sequences.file_reader(filename)
                 for seq in file_reader:
-                    seq.id = prefix + '.' + seq.id
+                    try:
+                        prefix, suffix = seq.id.split('_', maxsplit=1)
+                        description = 'Original name: ' + seq.id
+                        seq.id = prefix + '.' + suffix
+                    except:
+                        description = '.'
+
+                    # names are not unique across the files
+                    if seq.id in used_names:
+                        used_names[seq.id] += 1
+                        seq.id += '_' + str(used_names[seq.id])
+                    else:
+                        used_names[seq.id] = 1
+
                     print(seq, file=fout_fa)
-                    print(seq.id, '1', '0', '.', '.', '.', sep='\t', file=fout_tsv)
+                    print(seq.id, '1', '0', '.', '.', description, sep='\t', file=fout_tsv)
 
         pyfastaq.utils.close(fout_fa)
         pyfastaq.utils.close(fout_tsv)
         print('\nFinished combining files\n')
         os.chdir(current_dir)
-        shutil.rmtree(tmpdir)
+        if not self.debug:
+            shutil.rmtree(tmpdir)
         print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
         print('You can use them with ARIBA like this:')
         print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
@@ -273,7 +291,8 @@ class RefGenesGetter:
 
         pyfastaq.utils.close(f_out_tsv)
         pyfastaq.utils.close(f_out_fa)
-        shutil.rmtree(tmpdir)
+        if not self.debug:
+            shutil.rmtree(tmpdir)
 
         print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
         print('You can use them with ARIBA like this:')
@@ -326,7 +345,8 @@ class RefGenesGetter:
         pyfastaq.utils.close(fout_tsv)
         print('\nFinished combining files\n')
         os.chdir(current_dir)
-        shutil.rmtree(tmpdir)
+        if not self.debug:
+            shutil.rmtree(tmpdir)
         print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
         print('You can use them with ARIBA like this:')
         print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
@@ -358,6 +378,8 @@ class RefGenesGetter:
 
         pyfastaq.utils.close(f_out_fa)
         pyfastaq.utils.close(f_out_meta)
+        if not self.debug:
+            os.unlink(srst2_fa)
 
         print('Finished downloading and converting data. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
         print('You can use them with ARIBA like this:')
@@ -390,7 +412,8 @@ class RefGenesGetter:
         print('Extracting files ... ', end='', flush=True)
         vparser = vfdb_parser.VfdbParser(zipfile, outprefix)
         vparser.run()
-        shutil.rmtree(tmpdir)
+        if not self.debug:
+            shutil.rmtree(tmpdir)
         print('done')
         final_fasta = outprefix + '.fa'
         final_tsv = outprefix + '.tsv'
diff --git a/ariba/ref_preparer.py b/ariba/ref_preparer.py
index c2cb310..ecacd65 100644
--- a/ariba/ref_preparer.py
+++ b/ariba/ref_preparer.py
@@ -1,8 +1,9 @@
 import sys
 import os
+import shutil
 import pickle
 import pyfastaq
-from ariba import reference_data
+from ariba import reference_data, mash
 
 class Error (Exception): pass
 
@@ -23,6 +24,7 @@ class RefPreparer:
         clusters_file=None,
         threads=1,
         verbose=False,
+        force=False,
     ):
         self.extern_progs = extern_progs
 
@@ -43,6 +45,7 @@ class RefPreparer:
         self.clusters_file = clusters_file
         self.threads = threads
         self.verbose = verbose
+        self.force = force
 
 
     @classmethod
@@ -136,6 +139,9 @@ class RefPreparer:
     def run(self, outdir):
         original_dir = os.getcwd()
 
+        if self.force and os.path.exists(outdir):
+            shutil.rmtree(outdir)
+
         if os.path.exists(outdir):
             raise Error('Error! Output directory ' + outdir + ' already exists. Cannot continue')
 
@@ -204,3 +210,8 @@ class RefPreparer:
         with open(clusters_pickle_file, 'wb') as f:
             pickle.dump(clusters, f)
 
+        if self.verbose:
+            print('\nMash-sketching all reference sequences', flush=True)
+
+        mash.Masher.sketch(os.path.join(outdir, '02.cdhit.all.fa'), True, self.extern_progs, self.verbose, sys.stdout)
+
diff --git a/ariba/reference_data.py b/ariba/reference_data.py
index 2ddbac0..69c5755 100644
--- a/ariba/reference_data.py
+++ b/ariba/reference_data.py
@@ -15,6 +15,7 @@ class ReferenceData:
     def __init__(self,
         fasta_files,
         metadata_tsv_files,
+        rename_file=None,
         min_gene_length=6,
         max_gene_length=10000,
         genetic_code=11,
@@ -32,6 +33,22 @@ class ReferenceData:
         pyfastaq.sequences.genetic_code = self.genetic_code
         self.rename_dict = None
 
+        if rename_file is None or not os.path.exists(rename_file):
+            self.ariba_to_original_name = {}
+        else:
+            self.ariba_to_original_name = ReferenceData._load_rename_file(rename_file)
+
+
+    @classmethod
+    def _load_rename_file(cls, filename):
+        ariba_name_to_original_name = {}
+        f = pyfastaq.utils.open_file_read(filename)
+        for line in f:
+            original_name, ariba_name = line.rstrip().split('\t')
+            ariba_name_to_original_name[ariba_name] = original_name
+        pyfastaq.utils.close(f)
+        return ariba_name_to_original_name
+
 
     @classmethod
     def _load_metadata_tsv(cls, filename, metadata_dict):
@@ -356,7 +373,6 @@ class ReferenceData:
     def rename_sequences(self, outfile):
         self.rename_dict = ReferenceData._seq_names_to_rename_dict(self.sequences.keys())
         if len(self.rename_dict):
-            print('Had to rename some sequences. See', outfile, 'for old -> new names', file=sys.stderr)
             with open(outfile, 'w') as f:
                 for old_name, new_name in sorted(self.rename_dict.items()):
                     print(old_name, new_name, sep='\t', file=f)
diff --git a/ariba/report.py b/ariba/report.py
index 324124a..bbe215c 100644
--- a/ariba/report.py
+++ b/ariba/report.py
@@ -1,40 +1,43 @@
 import copy
+import re
 import sys
 import pymummer
+from ariba import sequence_variant
 
 class Error (Exception): pass
 
 columns = [
-    'ref_name',              # 0  name of reference sequence
-    'gene',                  # 1  is a gene 0|1
-    'var_only',              # 2  is variant only 0|1
-    'flag',                  # 3  cluster flag
-    'reads',                 # 4  number of reads in this cluster
-    'cluster',               # 5  name of cluster
-    'ref_len',               # 6  length of reference sequence
-    'ref_base_assembled',    # 7  number of reference nucleotides assembled by this contig
-    'pc_ident',              # 8  %identity between ref sequence and contig
-    'ctg',                   # 9  name of contig matching reference
-    'ctg_len',               # 10  length of contig matching reference
-    'ctg_cov',               # 11 mean mapped read depth of this contig
-    'known_var',             # 12 is this a known SNP from reference metadata? 1|0
-    'var_type',              # 13 The type of variant. Currently only SNP supported
-    'var_seq_type',          # 14 if known_var=1, n|p for nucleotide or protein
-    'known_var_change',      # 15 if known_var=1, the wild/variant change, eg I42L
-    'has_known_var',         # 16 if known_var=1, 1|0 for whether or not the assembly has the variant
-    'ref_ctg_change',        # 17 amino acid or nucleotide change between reference and contig, eg I42L
-    'ref_ctg_effect',        # 18 effect of change between reference and contig, eg SYS, NONSYN (amino acid changes only)
-    'ref_start',             # 19 start position of variant in contig
-    'ref_end',               # 20 end position of variant in contig
-    'ref_nt',                # 21 nucleotide(s) in contig at variant position
-    'ctg_start',             # 22 start position of variant in contig
-    'ctg_end',               # 23 end position of variant in contig
-    'ctg_nt',                # 24 nucleotide(s) in contig at variant position
-    'smtls_total_depth',     # 25 total read depth at variant start position in contig, reported by mpileup
-    'smtls_alt_nt',          # 26 alt nucleotides on contig, reported by mpileup
-    'smtls_alt_depth',       # 27 alt depth on contig, reported by mpileup
-    'var_description',       # 28 description of variant from reference metdata
-    'free_text',             # 29 other free text about reference sequence, from reference metadata
+    'ariba_ref_name',        # 0  ariba (renamed) name of reference sequence
+    'ref_name',              # 1  original name of ref sequence
+    'gene',                  # 2  is a gene 0|1
+    'var_only',              # 3  is variant only 0|1
+    'flag',                  # 4  cluster flag
+    'reads',                 # 5  number of reads in this cluster
+    'cluster',               # 6  name of cluster
+    'ref_len',               # 7  length of reference sequence
+    'ref_base_assembled',    # 8  number of reference nucleotides assembled by this contig
+    'pc_ident',              # 9  %identity between ref sequence and contig
+    'ctg',                   # 10  name of contig matching reference
+    'ctg_len',               # 11  length of contig matching reference
+    'ctg_cov',               # 12 mean mapped read depth of this contig
+    'known_var',             # 13 is this a known SNP from reference metadata? 1|0
+    'var_type',              # 14 The type of variant. Currently only SNP supported
+    'var_seq_type',          # 15 if known_var=1, n|p for nucleotide or protein
+    'known_var_change',      # 16 if known_var=1, the wild/variant change, eg I42L
+    'has_known_var',         # 17 if known_var=1, 1|0 for whether or not the assembly has the variant
+    'ref_ctg_change',        # 18 amino acid or nucleotide change between reference and contig, eg I42L
+    'ref_ctg_effect',        # 19 effect of change between reference and contig, eg SYS, NONSYN (amino acid changes only)
+    'ref_start',             # 20 start position of variant in contig
+    'ref_end',               # 21 end position of variant in contig
+    'ref_nt',                # 22 nucleotide(s) in contig at variant position
+    'ctg_start',             # 23 start position of variant in contig
+    'ctg_end',               # 24 end position of variant in contig
+    'ctg_nt',                # 25 nucleotide(s) in contig at variant position
+    'smtls_total_depth',     # 26 total read depth at variant start position in contig, reported by mpileup
+    'smtls_nts',             # 27 alt nucleotides on contig, reported by mpileup
+    'smtls_nts_depth',       # 28 alt depth on contig, reported by mpileup
+    'var_description',       # 29 description of variant from reference metdata
+    'free_text',             # 30 other free text about reference sequence, from reference metadata
 ]
 
 
@@ -53,8 +56,8 @@ var_columns = [
     'ctg_end',
     'ctg_nt',
     'smtls_total_depth',
-    'smtls_alt_nt',
-    'smtls_alt_depth',
+    'smtls_nts',
+    'smtls_nts_depth',
     'var_description',
 ]
 
@@ -91,11 +94,12 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
     if ref_nuc_range is None:
         return None
 
+    bases = []
     ctg_nts = []
     ref_nts = []
     smtls_total_depths = []
-    smtls_alt_nts = []
-    smtls_alt_depths = []
+    smtls_nts = []
+    smtls_depths = []
     contig_positions = []
 
     for ref_position in range(ref_nuc_range[0], ref_nuc_range[1]+1, 1):
@@ -106,17 +110,19 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
             ref_nts.append(cluster.ref_sequence[ref_position])
             contig_position, in_indel = nucmer_match.qry_coords_from_ref_coord(ref_position, variant_list)
             contig_positions.append(contig_position)
-            ref, alt, total_depth, alt_depths = cluster.samtools_vars.get_depths_at_position(contig_name, contig_position)
-            ctg_nts.append(ref)
-            smtls_alt_nts.append(alt)
+            bases, total_depth, base_depths = cluster.samtools_vars.get_depths_at_position(contig_name, contig_position)
+            #ctg_nts.append(ref)
+            #samtools_nts.append(bases)
+            ctg_nts.append(cluster.assembly.sequences[contig_name][contig_position])
+            smtls_nts.append(bases)
             smtls_total_depths.append(total_depth)
-            smtls_alt_depths.append(alt_depths)
+            smtls_depths.append(base_depths)
 
     ctg_nts = ';'.join(ctg_nts) if len(ctg_nts) else '.'
     ref_nts = ';'.join(ref_nts) if len(ref_nts) else '.'
-    smtls_alt_nts = ';'.join(smtls_alt_nts) if len(smtls_alt_nts) else '.'
+    smtls_nts = ';'.join(smtls_nts) if len(smtls_nts) else '.'
     smtls_total_depths = ';'.join([str(x)for x in smtls_total_depths]) if len(smtls_total_depths) else '.'
-    smtls_alt_depths = ';'.join([str(x)for x in smtls_alt_depths]) if len(smtls_alt_depths) else '.'
+    smtls_depths = ';'.join([str(x)for x in smtls_depths]) if len(smtls_depths) else '.'
     ctg_start = str(min(contig_positions) + 1) if contig_positions is not None else '.'
     ctg_end = str(max(contig_positions) + 1) if contig_positions is not None else '.'
 
@@ -128,8 +134,8 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
         ctg_end,
         ctg_nts,
         smtls_total_depths,
-        smtls_alt_nts,
-        smtls_alt_depths
+        smtls_nts,
+        smtls_depths
     ]]
 
 
@@ -149,6 +155,7 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
 
     common_first_columns = [
         cluster.ref_sequence.id,
+        cluster.refdata.ariba_to_original_name.get(cluster.ref_sequence.id, cluster.ref_sequence.id),
         cluster.is_gene,
         cluster.is_variant_only,
         str(cluster.status_flag),
@@ -193,33 +200,86 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
             if contributing_vars is None:
                 samtools_columns = [['.'] * 9]
             else:
-                contributing_vars.sort(key = lambda x: x.qry_start)
+                if var_effect in ['INDELS', 'MULTIPLE']:
+                    ref_start_pos = min([x.ref_start for x in contributing_vars])
+                    ref_end_pos = max([x.ref_start for x in contributing_vars])
+                    ctg_start_pos = min([x.qry_start for x in contributing_vars])
+                    ctg_end_pos = max([x.qry_start for x in contributing_vars])
+                else:
+                    ref_start_pos = 3 * position if cluster.is_gene == '1' else position
+                    assert contig_name in cluster.assembly_compare.nucmer_hits
+                    ref_start_hit = None
+                    for hit in cluster.assembly_compare.nucmer_hits[contig_name]:
+                        if hit.ref_name == cluster.ref_sequence.id and hit.ref_coords().distance_to_point(ref_start_pos) == 0:
+                            ref_start_hit = copy.copy(hit)
+                            break
+
+                    assert ref_start_hit is not None
+                    ctg_start_pos, ctg_start_in_indel = ref_start_hit.qry_coords_from_ref_coord(ref_start_pos, pymummer_variants)
+
+                    if known_var_change not in  ['.', 'unknown']:
+                        regex = re.match('^([^0-9]+)([0-9]+)([^0-9]+)$', known_var_change)
+                        try:
+                            ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+                        except:
+                            raise Error('Error parsing variant ' + known_var_change)
+                    elif ref_ctg_change != '.':
+                        if '_' in ref_ctg_change:
+                            regex = re.match('^([^0-9]+)([0-9]+)_[^0-9]+[0-9]+([^0-9]+)$', ref_ctg_change)
+                            try:
+                                ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+                            except:
+                                raise Error('Error parsing variant ' + ref_ctg_change)
+                        else:
+                            regex = re.match('^([^0-9]+)([0-9]+)([^0-9]+)$', ref_ctg_change)
+                            try:
+                                ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+                            except:
+                                raise Error('Error parsing variant ' + ref_ctg_change)
+                    else:
+                        assert var_effect == 'SYN'
+
+                    if var_effect == 'SYN':
+                        ref_end_pos = ref_start_pos + 2
+                        ctg_end_pos = ctg_start_pos + 2
+                    elif ref_var_string == '.' or var_effect in {'INS', 'DEL', 'FSHIFT', 'TRUNC', 'INDELS', 'UNKNOWN'}:
+                        ref_end_pos = ref_start_pos
+                        ctg_end_pos = ctg_start_pos
+                    elif cluster.is_gene == '1':
+                        ref_end_pos = ref_start_pos + 3 * len(ref_var_string) - 1
+                        ctg_end_pos = ctg_start_pos + 3 * len(ctg_var_string) - 1
+                    else:
+                        ref_end_pos = ref_start_pos + len(ref_var_string) - 1
+                        ctg_end_pos = ctg_start_pos + len(ctg_var_string) - 1
 
                 smtls_total_depth = []
                 smtls_alt_nt = []
                 smtls_alt_depth = []
 
-                for var in contributing_vars:
+                for qry_pos in range(ctg_start_pos, ctg_end_pos + 1, 1):
                     if contig_name in remaining_samtools_variants:
-                        remaining_samtools_variants[contig_name].discard(var.qry_start)
+                        try:
+                            remaining_samtools_variants[contig_name].discard(qry_pos)
+                        except:
+                            pass
 
-                    depths_tuple = cluster.samtools_vars.get_depths_at_position(contig_name, var.qry_start)
+                    depths_tuple = cluster.samtools_vars.get_depths_at_position(contig_name, qry_pos)
 
                     if depths_tuple is not None:
-                        smtls_alt_nt.append(depths_tuple[1])
-                        smtls_total_depth.append(str(depths_tuple[2]))
-                        smtls_alt_depth.append(str(depths_tuple[3]))
+                        smtls_alt_nt.append(depths_tuple[0])
+                        smtls_total_depth.append(str(depths_tuple[1]))
+                        smtls_alt_depth.append(str(depths_tuple[2]))
 
                 smtls_total_depth = ';'.join(smtls_total_depth) if len(smtls_total_depth) else '.'
                 smtls_alt_nt = ';'.join(smtls_alt_nt) if len(smtls_alt_nt) else '.'
                 smtls_alt_depth = ';'.join(smtls_alt_depth) if len(smtls_alt_depth) else '.'
                 samtools_columns = [
-                        str(contributing_vars[0].ref_start + 1), #ref_start
-                        str(contributing_vars[0].ref_end + 1), # ref_end
-                        ';'.join([x.ref_base for x in contributing_vars]), # ref_nt
-                        str(contributing_vars[0].qry_start + 1),  # ctg_start
-                        str(contributing_vars[0].qry_end + 1),  #ctg_end
-                        ';'.join([x.qry_base for x in contributing_vars]), #ctg_nt
+                        str(ref_start_pos + 1), #ref_start
+                        str(ref_end_pos + 1), # ref_end
+                        cluster.ref_sequence[ref_start_pos:ref_end_pos+1],
+                        str(ctg_start_pos + 1),  # ctg_start
+                        str(ctg_end_pos + 1),  #ctg_end
+                        cluster.assembly.sequences[contig_name][ctg_start_pos:ctg_end_pos + 1], # ctg_nt
                         smtls_total_depth,
                         smtls_alt_nt,
                         smtls_alt_depth,
@@ -230,6 +290,8 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
                 for matching_var in matching_vars_set:
                     if contributing_vars is None:
                         samtools_columns = _samtools_depths_at_known_snps_all_wild(matching_var, contig_name, cluster, pymummer_variants)
+                        samtools_columns[2] = samtools_columns[2].replace(';', '')
+                        samtools_columns[5] = samtools_columns[5].replace(';', '')
                     reported_known_vars.add(str(matching_var.variant))
                     variant_columns[3] = str(matching_var.variant)
 
@@ -271,7 +333,9 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
                     var_string = None
                 else:
                     ref_nt = cluster.ref_sequence[ref_coord]
-                    var_string = depths_tuple[0] + str(ref_coord + 1) + depths_tuple[1]
+                    ctg_nt = cluster.assembly.sequences[contig_name][var_position]
+                    alt_strings = [x for x in depths_tuple[0].split(',') if x != ctg_nt]
+                    var_string = ctg_nt + str(ref_coord + 1) + ','.join(alt_strings)
                     ref_coord = str(ref_coord + 1)
 
                 if var_string not in reported_known_vars:
@@ -280,10 +344,10 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
                         'HET', # var_type
                         '.', '.', '.', var_string, '.', ref_coord, ref_coord, ref_nt, # var_seq_type ... ref_nt
                         str(var_position + 1), str(var_position + 1), # ctg_start, ctg_end
-                        depths_tuple[0], # ctg_nt
-                        str(depths_tuple[2]), # smtls_total_depth
-                        depths_tuple[1], # smtls_alt_nt
-                        str(depths_tuple[3]), # smtls_alt_depth
+                        ctg_nt, # ctg_nt
+                        str(depths_tuple[1]), # smtls_total_depth
+                        depths_tuple[0], # smtls_alt_nt
+                        str(depths_tuple[2]), # smtls_alt_depth
                         '.',
                         free_text_column,
                     ]
@@ -297,11 +361,11 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
 
 def report_lines(cluster):
     if cluster.status_flag.has('ref_seq_choose_fail'):
-        fields = ['.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 6)
+        fields = ['.', '.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 7)
         assert len(fields) == len(columns)
         return ['\t'.join(fields)]
     elif cluster.status_flag.has('assembly_fail'):
-        fields = ['.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 6)
+        fields = ['.', '.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 7)
         assert len(fields) == len(columns)
         return ['\t'.join(fields)]
 
diff --git a/ariba/samtools_variants.py b/ariba/samtools_variants.py
index a7c58be..84c97fb 100644
--- a/ariba/samtools_variants.py
+++ b/ariba/samtools_variants.py
@@ -76,7 +76,8 @@ class SamtoolsVariants:
 
         if len(rows) == 1:
             r, p, ref_base, alt_base, ref_counts, alt_counts = rows[0].rstrip().split()
-            return ref_base, alt_base, int(ref_counts), alt_counts
+            bases = ref_base if alt_base == '.' else ref_base + ',' + alt_base
+            return bases, int(ref_counts), alt_counts
         else:
             return None
 
@@ -161,7 +162,7 @@ class SamtoolsVariants:
         if seq_name in d and position in d[seq_name]:
             return d[seq_name][position]
         else:
-            return 'ND', 'ND', 'ND', 'ND'
+            return 'ND', 'ND', 'ND'
 
 
     def run(self):
diff --git a/ariba/summary.py b/ariba/summary.py
index 3d2b349..fc0a2b0 100644
--- a/ariba/summary.py
+++ b/ariba/summary.py
@@ -126,13 +126,15 @@ class Summary:
                             if variant.var_group not in seen_groups:
                                 seen_groups[variant.var_group] = {'yes': 0, 'het': 0}
 
-                            if variant.het_percent is None:
-                                seen_groups[variant.var_group]['yes'] += 1
-                                this_cluster_dict['groups'][variant.var_group] = 'yes'
-                            else:
+                            if variant.het_percent is not None:
+                                this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
+
+                            if variant.is_het:
                                 seen_groups[variant.var_group]['het'] += 1
                                 this_cluster_dict['groups'][variant.var_group] = 'het'
-                                this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
+                            else:
+                                seen_groups[variant.var_group]['yes'] += 1
+                                this_cluster_dict['groups'][variant.var_group] = 'yes'
 
                     for group, d in seen_groups.items():
                         if d['het'] > 0 and d['het'] + d['yes'] > 1:
@@ -254,6 +256,7 @@ class Summary:
             'het': '#fdbf6f',
             'fragmented': '#1f78b4',
             'interrupted': '#a6cee3',
+            'partial': '#fdbf6f',
         }
 
         cols_to_add_colour_col.reverse()
@@ -272,7 +275,8 @@ class Summary:
     @classmethod
     def _matrix_to_csv(cls, matrix, header, outfile, remove_nas=False):
         f = pyfastaq.utils.open_file_write(outfile)
-        print(*header, sep=',', file=f)
+        fixed_header = [x.replace(',', '/') for x in header]
+        print(*fixed_header, sep=',', file=f)
         for line in matrix:
             if remove_nas:
                 new_line = ['' if x=='NA' else x for x in line]
@@ -284,6 +288,10 @@ class Summary:
 
     @staticmethod
     def _distance_score_between_values(value1, value2):
+        if value1 == 'partial':
+            value1 = 'no'
+        if value2 == 'partial':
+            value2 = 'no'
         value_set = {value1, value2}
         if value_set.isdisjoint(required_keys_for_difference) or value1 == value2 or value_set == {'NA', 'no'}:
             return 0
diff --git a/ariba/summary_cluster.py b/ariba/summary_cluster.py
index efc4cf8..f3f952c 100644
--- a/ariba/summary_cluster.py
+++ b/ariba/summary_cluster.py
@@ -96,6 +96,14 @@ class SummaryCluster:
         return identity
 
 
+    def _has_any_part_of_ref_assembled(self):
+        for d in self.data:
+            if isinstance(d['ref_base_assembled'], int) and d['ref_base_assembled'] > 0:
+                return True
+
+        return False
+
+
     def _to_cluster_summary_assembled(self):
         if len(self.data) == 0:
             return 'no'
@@ -105,10 +113,10 @@ class SummaryCluster:
         else:
             has_complete_gene = self.flag.has('complete_gene')
 
-        if self.flag.has('assembly_fail') or \
-          (not self.flag.has('assembled')) or \
-          self.flag.has('ref_seq_choose_fail'):
+        if self.flag.has('assembly_fail') or self.flag.has('ref_seq_choose_fail'):
             return 'no'
+        elif not self.flag.has('assembled'):
+            return 'partial' if self._has_any_part_of_ref_assembled() else 'no'
         elif self.flag.has('assembled_into_one_contig') and has_complete_gene:
             if self.flag.has('unique_contig') and \
               (not self.flag.has('scaffold_graph_bad')) and \
@@ -126,42 +134,70 @@ class SummaryCluster:
 
     @classmethod
     def _has_known_variant(cls, data_dict):
-        return data_dict['has_known_var'] == '1'
+        if data_dict['has_known_var'] == '1':
+            return 'yes'
+        elif data_dict['known_var'] == '0':
+            return 'no'
+        elif data_dict['gene'] == '1': # we don't yet call hets in genes
+            return 'no'
+        else:
+            cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
+            return 'het' if cluster_var.is_het else 'no'
 
 
     def _has_any_known_variant(self):
-        for d in self.data:
-            if self._has_known_variant(d):
-                return 'yes'
-        return 'no'
+        results = {self._has_known_variant(d) for d in self.data}
+        if 'yes' in results:
+            return 'yes'
+        else:
+            return 'het' if 'het' in results else 'no'
 
 
     @classmethod
     def _has_nonsynonymous(cls, data_dict):
-        return data_dict['ref_ctg_effect'] != 'SYN' and \
-          (
-              data_dict['has_known_var'] == '1' or \
-              (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.'))
-          )
+        cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
+
+        has_non_het = data_dict['ref_ctg_effect'] != 'SYN' and \
+        (
+            data_dict['has_known_var'] == '1' or \
+            (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.'))
+        )
+
+        if has_non_het and not cluster_var.is_het:
+            return 'yes'
+        else:
+            return 'het' if cluster_var.is_het else 'no'
 
 
     def _has_any_nonsynonymous(self):
-        for d in self.data:
-            if self._has_nonsynonymous(d):
-                return 'yes'
-        return 'no'
+        results = {SummaryCluster._has_nonsynonymous(d) for d in self.data}
+
+        if 'yes' in results:
+            return 'yes'
+        else:
+            return 'het' if 'het' in results else 'no'
 
 
     @classmethod
     def _has_novel_nonsynonymous(cls, data_dict):
-        return SummaryCluster._has_nonsynonymous(data_dict) and not SummaryCluster._has_known_variant(data_dict)
+        has_nonsynon = SummaryCluster._has_nonsynonymous(data_dict)
+        if has_nonsynon == 'no':
+            return 'no'
+        else:
+            has_known = SummaryCluster._has_known_variant(data_dict)
+            if has_known == 'no':
+                return has_nonsynon
+            else:
+                return 'no'
 
 
     def _has_any_novel_nonsynonymous(self):
-        for d in self.data:
-            if self._has_novel_nonsynonymous(d):
-                return 'yes'
-        return 'no'
+        results = {SummaryCluster._has_novel_nonsynonymous(d) for d in self.data}
+
+        if 'yes' in results:
+            return 'yes'
+        else:
+            return 'het' if 'het' in results else 'no'
 
 
     def _to_cluster_summary_has_known_nonsynonymous(self, assembled_summary):
@@ -198,12 +234,12 @@ class SummaryCluster:
             return None
 
         if data_dict['known_var'] == '1' and data_dict['ref_ctg_effect'] == 'SNP' \
-          and data_dict['smtls_alt_nt'] != '.' and ';' not in data_dict['smtls_alt_nt']:
-            nucleotides = [data_dict['ctg_nt']] + data_dict['smtls_alt_nt'].split(',')
-            depths = data_dict['smtls_alt_depth'].split(',')
+          and data_dict['smtls_nts'] != '.' and ';' not in data_dict['smtls_nts']:
+            nucleotides = data_dict['smtls_nts'].split(',')
+            depths = data_dict['smtls_nts_depth'].split(',')
 
             if len(nucleotides) != len(depths):
-                raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_alt_nt, smtls_alt_depth columns. Cannot continue\n' + str(data_dict))
+                raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_nts, smtls_nts_depth columns. Cannot continue\n' + str(data_dict))
 
             try:
                 var_nucleotide = data_dict['known_var_change'][-1]
@@ -220,14 +256,13 @@ class SummaryCluster:
             return None
 
 
-
     @staticmethod
     def _get_nonsynonymous_var(data_dict):
         '''if data_dict has a non synonymous variant, return string:
         ref_name.change. Otherwise return None'''
         has_nonsyn = SummaryCluster._has_nonsynonymous(data_dict)
 
-        if not has_nonsyn:
+        if has_nonsyn == 'no':
             return None
         elif data_dict['known_var_change'] == data_dict['ref_ctg_change'] == '.' == data_dict['ref_ctg_effect']:
             raise Error('Unexpected data in ariba summary... \n' + str(data_dict) + '\n... known_var_change, ref_ctg_change, ref_ctg_effect all equal to ".", but has a non synonymous change. Something is inconsistent. Cannot continue')
@@ -251,6 +286,7 @@ class SummaryCluster:
 
             return (data_dict['ref_name'], var_change) + var_group
 
+
     def _has_match(self, assembled_summary):
         '''assembled_summary should be output of _to_cluster_summary_assembled'''
         if assembled_summary.startswith('yes'):
@@ -266,7 +302,7 @@ class SummaryCluster:
         '''Returns a set of the variant group ids that this cluster has'''
         ids = set()
         for d in self.data:
-            if self._has_known_variant(d) and d['var_group'] != '.':
+            if self._has_known_variant(d) != 'no' and d['var_group'] != '.':
                 ids.add(d['var_group'])
         return ids
 
@@ -298,7 +334,10 @@ class SummaryCluster:
         for d in self.data:
             snp_tuple = self._get_known_noncoding_het_snp(d)
             if snp_tuple is not None:
-                snp_id = d['var_description'].split(':')[4]
+                try:
+                    snp_id = d['var_description'].split(':')[4]
+                except:
+                    raise Error('Error getting ID from ' + str(d) + '\n' + d['var_description'])
                 if snp_id not in snps:
                     snps[snp_id] = {}
                 snps[snp_id][snp_tuple[0]] = snp_tuple[1]
@@ -311,7 +350,7 @@ class SummaryCluster:
 
         for data_dict in data_dicts:
             cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
-            if cluster_var.has_nonsynon:
+            if cluster_var.has_nonsynon or cluster_var.is_het:
                 variants.add(cluster_var)
 
         return variants
diff --git a/ariba/summary_cluster_variant.py b/ariba/summary_cluster_variant.py
index a0c30a9..62c231a 100644
--- a/ariba/summary_cluster_variant.py
+++ b/ariba/summary_cluster_variant.py
@@ -2,6 +2,11 @@ class Error (Exception): pass
 
 class SummaryClusterVariant:
     def __init__(self, data_dict):
+        self.known = None
+        self.var_group = None
+        self.coding = None
+        self.var_string = None
+        self.het_percent = None
         self._get_nonsynon_variant_data(data_dict)
 
 
@@ -14,10 +19,7 @@ class SummaryClusterVariant:
 
 
     def __str__(self):
-        if self.has_nonsynon:
-            return ', '.join((str(self.known), self.var_group, str(self.coding), self.var_string, str(self.het_percent)))
-        else:
-            return 'None'
+        return ', '.join((str(self.known), self.var_group, str(self.coding), self.var_string, str(self.het_percent)))
 
 
     @classmethod
@@ -43,22 +45,30 @@ class SummaryClusterVariant:
 
     @classmethod
     def _get_is_het_and_percent(cls, data_dict):
-        if data_dict['gene'] == '1' or not (data_dict['ref_ctg_effect'] == 'SNP' or data_dict['var_type'] == 'HET') or data_dict['smtls_alt_nt'] == '.' or ';' in data_dict['smtls_alt_nt'] or data_dict['smtls_alt_depth'] == 'ND':
+        if data_dict['gene'] == '1' or not (data_dict['known_var'] == '1' or data_dict['ref_ctg_effect'] == 'SNP' or data_dict['var_type'] == 'HET') or data_dict['smtls_nts'] == '.' or ';' in data_dict['smtls_nts'] or data_dict['smtls_nts_depth'] == 'ND':
             return False, None
         else:
-            nucleotides = [data_dict['ctg_nt']] + data_dict['smtls_alt_nt'].split(',')
-            depths = data_dict['smtls_alt_depth'].split(',')
+            nucleotides = data_dict['smtls_nts'].split(',')
+            depths = data_dict['smtls_nts_depth'].split(',')
 
             if len(nucleotides) != len(depths):
-                raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_alt_nt, smtls_alt_depth columns. Cannot continue\n' + str(data_dict))
+                raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_nts, smtls_nts_depth columns. Cannot continue\n' + str(data_dict))
 
             try:
-                is_het = False
+                depths = [int(x) for x in depths]
+                nuc_to_depth = dict(zip(nucleotides, depths))
 
                 if data_dict['ref_ctg_change'] != '.':
                     var_nucleotide = data_dict['ref_ctg_change'][-1]
                 elif data_dict['var_type'] == 'HET':
-                    var_nucleotide = data_dict['smtls_alt_nt']
+                    var_nucleotide = '.'
+                    best_depth = -1
+                    for nuc in nuc_to_depth:
+                        if nuc == data_dict['ctg_nt']:
+                            continue
+                        elif nuc_to_depth[nuc] > best_depth:
+                            var_nucleotide = nuc
+                            best_depth = nuc_to_depth[nuc]
                 elif data_dict['known_var_change'] != '.':
                     var_nucleotide = data_dict['known_var_change'][-1]
                 else:
@@ -66,15 +76,13 @@ class SummaryClusterVariant:
 
                 if var_nucleotide == '.':
                     return False, None
-                depths = [int(x) for x in depths]
-                nuc_to_depth = dict(zip(nucleotides, depths))
                 total_depth = sum(depths)
-                var_depth = nuc_to_depth.get(var_nucleotide, 0)
-
-                if data_dict['var_type'] == 'HET':
-                    is_het = True
+                if max([len(x) for x in nucleotides]) == 1:
+                    var_depth = nuc_to_depth.get(var_nucleotide, 0)
                 else:
-                    is_het = SummaryClusterVariant._depths_look_het(depths)
+                    var_depth = sum([nuc_to_depth[x] for x in nuc_to_depth if x[0] == var_nucleotide])
+
+                is_het = SummaryClusterVariant._depths_look_het(depths)
 
                 return is_het, round(100 * var_depth / total_depth, 1)
             except:
@@ -82,6 +90,19 @@ class SummaryClusterVariant:
 
 
     def _get_nonsynon_variant_data(self, data_dict):
+        self.known = data_dict['known_var'] == '1'
+        self.coding = data_dict['gene'] == '1'
+        self.var_group = data_dict['var_group']
+
+        if data_dict['known_var'] == '1' and data_dict['known_var_change'] != '.':
+            self.var_string = data_dict['known_var_change']
+        elif data_dict['ref_ctg_change'] != '.':
+            self.var_string = data_dict['ref_ctg_change']
+        else:
+            self.var_string = data_dict['ref_ctg_effect']
+
+        self.is_het, self.het_percent = SummaryClusterVariant._get_is_het_and_percent(data_dict)
+
         if not SummaryClusterVariant._has_nonsynonymous(data_dict):
             self.has_nonsynon = False
             return
@@ -94,16 +115,4 @@ class SummaryClusterVariant:
           data_dict['known_var_change'] != data_dict['ref_ctg_change']:
             raise Error('Unexpected data in ariba summary... \n' + str(data_dict) + '\n... known_var_change != ref_ctg_change. Cannot continue')
 
-        self.known = data_dict['known_var'] == '1'
-        self.var_group = data_dict['var_group']
-        self.coding = data_dict['gene'] == '1'
-
-        if data_dict['known_var'] == '1' and data_dict['known_var_change'] != '.':
-            self.var_string = data_dict['known_var_change']
-        elif data_dict['ref_ctg_change'] != '.':
-            self.var_string = data_dict['ref_ctg_change']
-        else:
-            self.var_string = data_dict['ref_ctg_effect']
-
-        self.is_het, self.het_percent = SummaryClusterVariant._get_is_het_and_percent(data_dict)
 
diff --git a/ariba/tasks/getref.py b/ariba/tasks/getref.py
index d5cc618..b744454 100644
--- a/ariba/tasks/getref.py
+++ b/ariba/tasks/getref.py
@@ -5,7 +5,8 @@ from ariba import ref_genes_getter
 def run(options):
     getter = ref_genes_getter.RefGenesGetter(
         options.db,
-        version=options.version
+        version=options.version,
+        debug=options.debug
     )
     getter.run(options.outprefix)
 
diff --git a/ariba/tasks/prepareref.py b/ariba/tasks/prepareref.py
index ef52684..29c699e 100644
--- a/ariba/tasks/prepareref.py
+++ b/ariba/tasks/prepareref.py
@@ -25,6 +25,7 @@ def run(options):
         clusters_file=options.cdhit_clusters,
         threads=options.threads,
         verbose=options.verbose,
+        force=options.force,
     )
 
     preparer.run(options.outdir)
diff --git a/ariba/tasks/run.py b/ariba/tasks/run.py
index 668b69d..db3bd32 100644
--- a/ariba/tasks/run.py
+++ b/ariba/tasks/run.py
@@ -1,5 +1,6 @@
 import argparse
 import os
+import shutil
 import sys
 import ariba
 
@@ -27,6 +28,9 @@ def run(options):
         print('Input directory', options.prepareref_dir, 'not found. Cannot continue', file=sys.stderr)
         sys.exit(1)
 
+    if options.force and os.path.exists(options.outdir):
+        shutil.rmtree(options.outdir)
+
     if os.path.exists(options.outdir):
         print('Output directory already exists. ARIBA makes the output directory. Cannot continue.', file=sys.stderr)
         sys.exit(1)
diff --git a/ariba/test_run_data/metadata.tsv b/ariba/test_run_data/metadata.tsv
index 9d24ab1..77a71d5 100644
--- a/ariba/test_run_data/metadata.tsv
+++ b/ariba/test_run_data/metadata.tsv
@@ -15,5 +15,6 @@ noncoding1	0	0	A14T	noncoding_group1	ref has wild type, reads have variant so sh
 noncoding1	0	0	A40C	.	ref has variant, reads have wild type so should not report
 noncoding2	0	0	.	.	generic description of noncoding2
 noncoding3	0	0	.	.	generic description of noncoding3
+noncoding4	0	0	.	.	generic description of noncoding4
 noncoding.var_only1	0	1	C6T	.	sample does not have this SNP
 noncoding.var_only2	0	1	T10A	.	sample does have this SNP
diff --git a/ariba/test_run_data/reads_1.fq b/ariba/test_run_data/reads_1.fq
index 7e34e6a..62ff409 100644
--- a/ariba/test_run_data/reads_1.fq
+++ b/ariba/test_run_data/reads_1.fq
@@ -2086,3 +2086,7 @@ IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
 GATCGTACTGAGGTGTCACGAACGACGGACCAGCGTCGTACGTACGTACGTACGTAGGAGACAGCAGCAAACTGACGTAATCTACCTGAT
 +
 IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding4:1:18:130/1
+CCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTAGTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/test_run_data/reads_2.fq b/ariba/test_run_data/reads_2.fq
index f468576..d5a9376 100644
--- a/ariba/test_run_data/reads_2.fq
+++ b/ariba/test_run_data/reads_2.fq
@@ -2086,3 +2086,7 @@ IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
 GCGGGTTTTTGGCCCCTATAACAGTACGTACGTCATGCTGCTGCTCGTAGTATATATAATTAATATATAGTATGCAGTCAGTTGTCGCGA
 +
 IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding4:1:18:130/2
+CTCCGTAACTCGCCGTTCTGTGCCTATCACGAACCGAGCGCGATGATTTTCCTTTTATCCACCAGGTTGGTAATATTCCTTTTAAGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/test_run_data/ref_fasta_to_make_reads_from.fa b/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
index 1bf715b..0294174 100644
--- a/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
+++ b/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
@@ -46,6 +46,11 @@ CTAACTTACTACTATGACTGACTGACTGACTGACTGATCGACTGCTGACATCTGATCGAT
 CATCCTGTCGACATCATATCTCGATCGATCGATCGACTGACTGACTGACTGACTGAATCT
 CACGTACTGACTCATCATCATCATACTCATCATATCATCGATCGATCATCTGATCTGATG
 ACGGGACGACCTCCGTTCGTGTCTGGCTGGTGTCAGCTTTGACTTAAGACGCGCCAAGCC
+>noncoding4
+AGAGATGATCTCTAGCTCCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTA
+GTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATGAATTTCACCAGTC
+GCAACCAGCTGCTCTTAAAAGGAATATTACCAACCTGGTGGATAAAAGGAAAATCATCGC
+GCTCGGTTCGTGATAGGCACAGAACGGCGAGTTACGGAGCCCGGATGGTAGATCGGTTAA
 >noncoding.var_only1
 CTTGGGGTTCCTGTACTAAAATCACTACGAAAGAGCAACCGTCCCGTTTCAGAGCTAAGG
 CGTATCGACGTACTGACGTCGTATGCGTCGTCGACGTGTCCAGGCGCGCCGCGCGCCCCC
diff --git a/ariba/test_run_data/ref_seqs.fa b/ariba/test_run_data/ref_seqs.fa
index 8251d0a..9890fc3 100644
--- a/ariba/test_run_data/ref_seqs.fa
+++ b/ariba/test_run_data/ref_seqs.fa
@@ -19,6 +19,11 @@ GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
 CTAACTTACTACTATGACTGACTGACTGACTGACTGATCGACTGCTGACATCTGATCGAT
 CATCCTGTCGACATCATATCTCGATCGATCGATCGACTGACTGACTGACTGACTGAATCT
 CACGTACTGACTCATCATCATCATACTCATCATATCATCGATCGATCATCTGATCTGATG
+>noncoding4
+AGAGATGATCTCTAGCTCCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTA
+GTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATGAATTTCACCAGTC
+GCAACCAGCTGCTCTTAAAAGGAATATTACCAACCTGGTGGATAAAAGGAAAATCATCGC
+GCTCGGTTCGTGATAGGCACAGAACGGCGAGTTACGGAGCCCGGATGGTAGATCGGTTAA
 >noncoding.var_only1
 CGTATCATCGATCATCGTACGTACGTCGTCGTAGTCAGTCACGAGCAGAGAGAGAGGAGG
 CTATGCATGCATCATCTCACTGCGAGTCAGGAGAGAAGCCGTGTAGCGACGAGCGCAAAA
diff --git a/ariba/tests/assembly_test.py b/ariba/tests/assembly_test.py
index 7ac81b8..740234c 100644
--- a/ariba/tests/assembly_test.py
+++ b/ariba/tests/assembly_test.py
@@ -68,7 +68,7 @@ class TestAssembly(unittest.TestCase):
         tmp_log = 'tmp.test_assemble_with_fermilite.log'
         tmp_log_fh = open(tmp_log, 'w')
         print('First line', file=tmp_log_fh)
-        a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh)
+        a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh, 'not needed')
         a._assemble_with_fermilite()
         self.assertTrue(a.assembled_ok)
         tmp_log_fh.close()
@@ -87,7 +87,7 @@ class TestAssembly(unittest.TestCase):
         tmp_log = 'tmp.test_assemble_with_fermilite_fails.log'
         tmp_log_fh = open(tmp_log, 'w')
         print('First line', file=tmp_log_fh)
-        a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh)
+        a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh, 'not needed')
         a._assemble_with_fermilite()
         self.assertFalse(a.assembled_ok)
         tmp_log_fh.close()
@@ -104,7 +104,7 @@ class TestAssembly(unittest.TestCase):
         reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
         ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
         tmp_dir = 'tmp.test_assemble_with_spades'
-        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
         a._assemble_with_spades(unittest=True)
         self.assertTrue(a.assembled_ok)
         shutil.rmtree(tmp_dir)
@@ -117,7 +117,7 @@ class TestAssembly(unittest.TestCase):
         reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
         ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
         tmp_dir = 'tmp.test_assemble_with_spades'
-        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
         a._assemble_with_spades(unittest=False)
         self.assertFalse(a.assembled_ok)
         shutil.rmtree(tmp_dir)
@@ -130,7 +130,7 @@ class TestAssembly(unittest.TestCase):
         reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
         ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
         tmp_dir = 'tmp.test_scaffold_with_sspace'
-        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+        a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
         a.assembly_contigs = os.path.join(data_dir, 'assembly_test_scaffold_with_sspace_contigs.fa')
         a._scaffold_with_sspace()
         self.assertTrue(os.path.exists(a.scaffolder_scaffolds))
@@ -162,7 +162,7 @@ class TestAssembly(unittest.TestCase):
         reads1 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_1.fq')
         reads2 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_2.fq')
         tmp_dir = 'tmp.gap_fill_with_gapfiller_no_gaps'
-        a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+        a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
         a.scaffolder_scaffolds = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller.scaffolds_no_gaps.fa')
         a._gap_fill_with_gapfiller()
         self.assertTrue(os.path.exists(a.gapfilled_scaffolds))
@@ -175,7 +175,7 @@ class TestAssembly(unittest.TestCase):
         reads1 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_1.fq')
         reads2 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_2.fq')
         tmp_dir = 'tmp.gap_fill_with_gapfiller_with_gaps'
-        a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+        a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
         a.scaffolder_scaffolds = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller.scaffolds_with_gaps.fa')
         a._gap_fill_with_gapfiller()
         self.assertTrue(os.path.exists(a.gapfilled_scaffolds))
diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py
index 10b59ea..05a7109 100644
--- a/ariba/tests/cluster_test.py
+++ b/ariba/tests/cluster_test.py
@@ -105,7 +105,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=0, total_reads_bases=0)
         c.run()
 
-        expected = '\t'.join(['.', '.', '.', '64', '0', 'cluster_name'] + ['.'] * 24)
+        expected = '\t'.join(['.', '.', '.', '.', '64', '0', 'cluster_name'] + ['.'] * 24)
         self.assertEqual([expected], c.report_lines)
         self.assertFalse(c.status_flag.has('ref_seq_choose_fail'))
         self.assertTrue(c.status_flag.has('assembly_fail'))
@@ -123,7 +123,26 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108, spades_other_options='--only-assembler')
         c.run()
 
-        expected = '\t'.join(['.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)
+        expected = '\t'.join(['.', '.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)
+        self.assertEqual([expected], c.report_lines)
+        self.assertTrue(c.status_flag.has('ref_seq_choose_fail'))
+        self.assertFalse(c.status_flag.has('assembly_fail'))
+        shutil.rmtree(tmpdir)
+
+
+    def test_full_run_ref_not_in_cluster(self):
+        '''test complete run of cluster when nearest ref is outside cluster'''
+        fasta_in = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.in.fa')
+        tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.in.tsv')
+        refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+        tmpdir = 'tmp.test_full_run_ref_not_in_cluster'
+        ref_for_mash =  os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.mash.fa')
+        shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster'), tmpdir)
+
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600, refdata_seqs_fasta_for_mash=ref_for_mash)
+        c.run()
+
+        expected = '\t'.join(['.', '.', '.', '.', '1024', '72', 'cluster_name'] + ['.'] * 24)
         self.assertEqual([expected], c.report_lines)
         self.assertTrue(c.status_flag.has('ref_seq_choose_fail'))
         self.assertFalse(c.status_flag.has('assembly_fail'))
@@ -141,7 +160,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=4, total_reads_bases=304)
         c.run()
 
-        expected = '\t'.join(['.', '.', '.', '64', '4', 'cluster_name'] + ['.'] * 24)
+        expected = '\t'.join(['.', '.', '.', '.', '64', '4', 'cluster_name'] + ['.'] * 24)
         self.assertEqual([expected], c.report_lines)
         self.assertFalse(c.status_flag.has('ref_seq_choose_fail'))
         self.assertTrue(c.status_flag.has('assembly_fail'))
@@ -159,13 +178,14 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600)
         c.run()
 
+        self.maxDiff=None
         expected = [
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t14\t14\tA\t74\t74\tT\t19\t.\t19\tnoncoding1:0:0:A14T:.:ref has wild type, reads has variant so should report\tgeneric description of noncoding1',
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tG61T\tSNP\t61\t61\tG\t121\t121\tT\t24\t.\t24\t.\tgeneric description of noncoding1',
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\t.82C\tINS\t82\t82\t.\t143\t143\tC\t23\t.\t23\t.\tgeneric description of noncoding1',
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tT108.\tDEL\t108\t108\tT\t168\t168\t.\t17\t.\t17\t.\tgeneric description of noncoding1',
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA6G\t1\t.\t.\t6\t6\tG\t66\t66\tG\t19\t.\t19\tnoncoding1:0:0:A6G:.:variant in ref and reads so should report\tgeneric description of noncoding1',
-            'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tG9T\t0\t.\t.\t9\t9\tG\t69\t69\tG\t19\t.\t19\tnoncoding1:0:0:G9T:.:wild type in ref and reads\tgeneric description of noncoding1'
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t14\t14\tA\t74\t74\tT\t19\tT\t19\tnoncoding1:0:0:A14T:.:ref has wild type, reads has variant so should report\tgeneric description of noncoding1',
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tG61T\tSNP\t61\t61\tG\t121\t121\tT\t24\tT\t24\t.\tgeneric description of noncoding1',
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\t.82C\tINS\t82\t82\tA\t143\t143\tC\t23\tC\t23\t.\tgeneric description of noncoding1',
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tT108.\tDEL\t108\t108\tT\t168\t168\tC\t17\tC\t17\t.\tgeneric description of noncoding1',
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA6G\t1\t.\t.\t6\t6\tG\t66\t66\tG\t19\tG\t19\tnoncoding1:0:0:A6G:.:variant in ref and reads so should report\tgeneric description of noncoding1',
+            'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tG9T\t0\t.\t.\t9\t9\tG\t69\t69\tG\t19\tG\t19\tnoncoding1:0:0:G9T:.:wild type in ref and reads\tgeneric description of noncoding1'
         ]
 
         self.assertEqual(expected, c.report_lines)
@@ -184,12 +204,10 @@ class TestCluster(unittest.TestCase):
         c.run()
 
         expected = [
-            'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tA10V\t1\tA10V\tNONSYN\t29\t29\tC\t84\t84\tT\t22\t.\t22\tpresence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report\tGeneric description of presence_absence1',
-            'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t0\t.\tp\t.\t0\t.\tSYN\t54\t54\tT\t109\t109\tC\t32\t.\t32\t.\tGeneric description of presence_absence1',
-
-            'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t62\t64\tC;G;C\t18;17;17\t.;.;.\t18;17;17\tpresence_absence1:1:0:R3S:.:Ref and assembly have wild type\tGeneric description of presence_absence1',
-
-            'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tG;C;G\t68\t70\tG;C;G\t18;20;20\t.;.;.\t18;20;20\tpresence_absence1:1:0:I5A:.:Ref and reads have variant so report\tGeneric description of presence_absence1',
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tA10V\t1\tA10V\tNONSYN\t28\t30\tGCG\t83\t85\tGTG\t22;22;21\tG;T;G\t22;22;21\tpresence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report\tGeneric description of presence_absence1',
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t0\t.\tp\t.\t0\t.\tSYN\t52\t54\tATT\t107\t109\tATC\t31;31;32\tA;T;C\t31;31;32\t.\tGeneric description of presence_absence1',
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t62\t64\tCGC\t18;17;17\tC;G;C\t18;17;17\tpresence_absence1:1:0:R3S:.:Ref and assembly have wild type\tGeneric description of presence_absence1',
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tGCG\t68\t70\tGCG\t18;20;20\tG;C;G\t18;20;20\tpresence_absence1:1:0:I5A:.:Ref and reads have variant so report\tGeneric description of presence_absence1',
         ]
 
         self.assertEqual(expected, c.report_lines)
@@ -207,7 +225,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
         c.run()
         expected = [
-            'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'
+            'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -224,7 +242,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
         c.run()
         expected = [
-            'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'
+            'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -242,8 +260,8 @@ class TestCluster(unittest.TestCase):
         c.run()
 
         expected = [
-            'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type\tGeneric description of variants_only1',
-            'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tG;C;G\t71\t73\tG;C;G\t17;17;17\t.;.;.\t17;17;17\tvariants_only1:1:1:I5A:.:Ref and reads have variant so report\tGeneric description of variants_only1',
+            'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type\tGeneric description of variants_only1',
+            'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tGCG\t71\t73\tGCG\t17;17;17\tG;C;G\t17;17;17\tvariants_only1:1:1:I5A:.:Ref and reads have variant so report\tGeneric description of variants_only1',
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -259,7 +277,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
         c.run()
         expected = [
-            'gene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
+            'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -275,7 +293,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
-            'ref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_gene'
+            'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -293,7 +311,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
-            'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_gene'
+            'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -312,7 +330,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:0:M6I:.:Description of M6I snp\t.'
+            'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:0:M6I:.:Description of M6I snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -331,7 +349,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:1:M6I:.:Description of M6I snp\t.'
+            'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:1:M6I:.:Description of M6I snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -350,7 +368,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tI6M\t1\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:1:I6M:.:Description of I6M snp\t.'
+            'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tI6M\t1\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:1:I6M:.:Description of I6M snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -366,7 +384,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
-            'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_seq'
+            'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -382,7 +400,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
-            'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'
+            'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -398,7 +416,7 @@ class TestCluster(unittest.TestCase):
         c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
         c.run()
         expected = [
-            'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_seq'
+            'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -417,7 +435,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:0:G18A:.:Description of G18A snp\t.'
+            'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -436,7 +454,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:1:G18A:.:Description of G18A snp\t.'
+            'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:1:G18A:.:Description of G18A snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -455,7 +473,7 @@ class TestCluster(unittest.TestCase):
         # We shouldn't get an extra 'HET' line because we already know about the snp, so
         # included in the report of the known snp
         expected = [
-            'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tA18G\t1\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:1:A18G:.:Description of A18G snp\t.'
+            'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tA18G\t1\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:1:A18G:.:Description of A18G snp\t.'
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
@@ -472,8 +490,59 @@ class TestCluster(unittest.TestCase):
         c.run()
 
         expected = [
-            'presence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.scaffold.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+            'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.scaffold.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+        ]
+        self.assertEqual(expected, c.report_lines)
+        shutil.rmtree(tmpdir)
+
+
+    def test_full_run_multiple_vars_in_codon(self):
+        '''Test complete run where there is a codon with a SNP and an indel'''
+        fasta_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.fa')
+        tsv_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.tsv')
+        refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+        tmpdir = 'tmp.cluster_test_full_run_multiple_vars'
+        shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_multiple_vars'), tmpdir)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c.run()
+
+        expected = [
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t96\t96\t96.91\tcluster_name.scaffold.1\t1074\t20.4\t0\t.\tp\t.\t0\t.\tMULTIPLE\t25\t26\tGA\t487\t489\tCAT\t27;26;25\tC;A;T\t27;26;25\t.\tGeneric description of presence_absence1',
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t96\t96\t96.91\tcluster_name.scaffold.1\t1074\t20.4\t0\t.\tp\t.\t0\tA10fs\tFSHIFT\t28\t28\tG\t491\t491\tG\t26\tG\t26\t.\tGeneric description of presence_absence1',
         ]
         self.assertEqual(expected, c.report_lines)
         shutil.rmtree(tmpdir)
 
+
+    def test_full_run_delete_codon(self):
+        '''Test complete run where there is a deleted codon'''
+        fasta_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.fa')
+        tsv_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.tsv')
+        refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+        tmpdir = 'tmp.cluster_test_full_delete_codon'
+        shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_delete_codon'), tmpdir)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c.run()
+
+        expected = [
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t117\t117\t92.31\tcluster_name.scaffold.1\t1104\t20.0\t0\t.\tp\t.\t0\tR25_A26del\tDEL\t73\t73\tA\t553\t553\tA\t27\tA\t27\t.\tGeneric description of presence_absence1',
+        ]
+        self.assertEqual(expected, c.report_lines)
+        shutil.rmtree(tmpdir)
+
+
+    def test_full_run_insert_codon(self):
+        '''Test complete run where there is a inserted codon'''
+        fasta_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.fa')
+        tsv_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.tsv')
+        refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+        tmpdir = 'tmp.cluster_test_full_insert_codon'
+        shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_insert_codon'), tmpdir)
+        c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+        c.run()
+
+        expected = [
+            'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t108\t108\t92.31\tcluster_name.scaffold.1\t1115\t19.9\t0\t.\tp\t.\t0\tS25_M26insELI\tINS\t73\t73\tA\t554\t554\tG\t24\tG\t24\t.\tGeneric description of presence_absence1'
+        ]
+        self.assertEqual(expected, c.report_lines)
+        shutil.rmtree(tmpdir)
diff --git a/ariba/tests/clusters_test.py b/ariba/tests/clusters_test.py
index a107003..1056129 100644
--- a/ariba/tests/clusters_test.py
+++ b/ariba/tests/clusters_test.py
@@ -24,6 +24,7 @@ class TestClusters(unittest.TestCase):
         self.refdata_dir = 'tmp.RefData'
         os.mkdir(self.refdata_dir)
         shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.fa'), os.path.join(self.refdata_dir, '02.cdhit.all.fa'))
+        shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.fa.msh'), os.path.join(self.refdata_dir, '02.cdhit.all.fa.msh'))
         shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.tsv'), os.path.join(self.refdata_dir, '01.filter.check_metadata.tsv'))
         with open(os.path.join(self.refdata_dir, '00.info.txt'), 'w') as f:
             print('genetic_code\t11', file=f)
@@ -163,6 +164,16 @@ class TestClusters(unittest.TestCase):
         bin_size = 10
         inprefix = os.path.join(data_dir, 'clusters_test_load_minimap_files')
         got_clster2rep, got_cluster_read_count, got_cluster_base_count, got_insert_hist, got_proper_pairs = clusters.Clusters._load_minimap_files(inprefix, bin_size)
+        expected_clster2rep = {'1': 'ref2', '2': 'ref42'}
+        expected_cluster_read_count = {'1': 42, '2': 43}
+        expected_cluster_base_count = {'1': 4242, '2': 4343}
+        expected_insert_hist_bins = {80: 3, 90: 20, 100: 7, 110: 3}
+        expected_proper_pairs = 42424242
+        self.assertEqual(expected_clster2rep, got_clster2rep)
+        self.assertEqual(expected_cluster_read_count, got_cluster_read_count)
+        self.assertEqual(expected_cluster_base_count, got_cluster_base_count)
+        self.assertEqual(expected_insert_hist_bins, got_insert_hist.bins)
+        self.assertEqual(expected_proper_pairs, got_proper_pairs)
 
 
     def test_set_insert_size_data(self):
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon.fa b/ariba/tests/data/cluster_test_full_run_delete_codon.fa
new file mode 100644
index 0000000..021f2f2
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon.tsv b/ariba/tests/data/cluster_test_full_run_delete_codon.tsv
new file mode 100644
index 0000000..da35140
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon.tsv
@@ -0,0 +1 @@
+presence_absence1	1	0	.	.	Generic description of presence_absence1
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa b/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa
new file mode 100644
index 0000000..20af6ce
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGC
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
+GTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCT
+GGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCA
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAA
+TTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCA
+TGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAG
+GGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGT
+TCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCAT
+CAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTA
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGA
+TTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
+GTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTT
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
+AGTCTCTGGCACGTCTGACGACTCAATCTCGAGATCGTACACGGCGCTGTAGAGCCATGT
+AATCCCTCTGTTAAAGATCTGGCGACCTTAACAATAGAAA
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq
new file mode 100644
index 0000000..2269d0b
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq
@@ -0,0 +1,588 @@
+ at presence_absence1:1:493:567/1
+GAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:556:631/1
+ATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:707:783/1
+ATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:32:108/1
+GACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:925:999/1
+ACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:700:774/1
+ATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:770:844/1
+AAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:185:261/1
+CCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:871:946/1
+AGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:484:560/1
+GCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:629:704/1
+CACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:810:884/1
+GGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:277:351/1
+TTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:221:295/1
+ACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:778:852/1
+TCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:611:685/1
+GGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:706:780/1
+CATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:25:99/1
+ACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:70:144/1
+GACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:409:482/1
+CGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:767:841/1
+TAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:560:635/1
+CCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:83:157/1
+GATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:530:606/1
+GCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:485:561/1
+CGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:409:483/1
+CGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:553:627/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:529:599/1
+AGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:79:155/1
+CAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:749:822/1
+CTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:882:956/1
+CACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:82:157/1
+TGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:578:652/1
+AACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:299:373/1
+AATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:267:342/1
+AGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:80:153/1
+AGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:320:394/1
+GCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:616:689/1
+ACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:707:781/1
+ATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:30:104/1
+GGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:249:324/1
+GTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:264:337/1
+GTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:441:515/1
+CAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:553:628/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:641:716/1
+GCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:385:460/1
+CCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:46:121/1
+TAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:609:683/1
+CAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:169:243/1
+AACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:601:677/1
+ATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:295:372/1
+TGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:347:422/1
+GAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:382:456/1
+ACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:92:167/1
+ATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:654:729/1
+CTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:524:598/1
+AGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:742:816/1
+ATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:178:253/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:755:828/1
+TCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:200:274/1
+GACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:425:499/1
+TAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:470:546/1
+GGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:198:273/1
+CCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:692:766/1
+ATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:543:616/1
+TCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:161:235/1
+TCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:908:984/1
+TTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:729:805/1
+CGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:798:873/1
+CGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:760:835/1
+CTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:135:210/1
+CTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:443:519/1
+GGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:1:75/1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:400:474/1
+CGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:44:118/1
+TATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:865:937/1
+TCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:702:776/1
+TTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:199:272/1
+CGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:58:132/1
+AGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:772:848/1
+GTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:748:823/1
+ACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:510:583/1
+ACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:862:935/1
+TAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:324:399/1
+CACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:489:563/1
+CGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:637:711/1
+ATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:451:526/1
+AGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:713:787/1
+ATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:597:671/1
+GATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:246:318/1
+CTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:658:733/1
+TTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:624:700/1
+CAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:275:349/1
+GCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:497:572/1
+TTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:204:277/1
+ATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:427:502/1
+GGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:550:624/1
+TATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:188:262/1
+CTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:953:1030/1
+CAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTCTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:924:1000/1
+GACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:236:310/1
+GGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:67:141/1
+CCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:750:825/1
+TAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:746:822/1
+GTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:345:419/1
+TCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:225:300/1
+TAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:254:329/1
+ATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:783:856/1
+ATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:214:290/1
+TAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:499:575/1
+GGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:439:513/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:452:525/1
+GCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:196:271/1
+AACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:68:142/1
+CTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:879:956/1
+GCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:211:286/1
+TGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:570:645/1
+TGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:465:539/1
+AAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:701:776/1
+TTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:715:789/1
+TCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:124:198/1
+CACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:641:715/1
+GCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:368:441/1
+TTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:654:728/1
+CTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:901:976/1
+TAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:178:252/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:825:900/1
+TAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:10:85/1
+ATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:116:189/1
+GACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:145:221/1
+CTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:526:602/1
+CGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:934:1009/1
+GTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:893:969/1
+CTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:774:849/1
+TGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:763:836/1
+AGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:469:544/1
+AGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:905:977/1
+CCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:108:183/1
+CTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:782:855/1
+CATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:811:886/1
+GTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:383:457/1
+CACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:331:405/1
+CAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:719:794/1
+TTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:847:921/1
+GTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:330:404/1
+GCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:757:831/1
+AGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:899:974/1
+CGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq
new file mode 100644
index 0000000..487bff5
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq
@@ -0,0 +1,588 @@
+ at presence_absence1:1:493:567/2
+CCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:556:631/2
+ATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:707:783/2
+GAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:32:108/2
+CCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:925:999/2
+GGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:700:774/2
+ACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:770:844/2
+CATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:185:261/2
+TATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:871:946/2
+GTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:484:560/2
+TCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:629:704/2
+ATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:810:884/2
+CCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:277:351/2
+ACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:221:295/2
+AAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:778:852/2
+GTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:611:685/2
+TCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:706:780/2
+CAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:25:99/2
+GTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:70:144/2
+AATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:409:482/2
+TACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:767:841/2
+CAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:560:635/2
+GATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:83:157/2
+AGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:530:606/2
+CCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:485:561/2
+GTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:409:483/2
+ATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:553:627/2
+ATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:529:599/2
+AGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:79:155/2
+GGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:749:822/2
+ACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:882:956/2
+GTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:82:157/2
+AGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:578:652/2
+TCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:299:373/2
+CAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:267:342/2
+TGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:80:153/2
+TTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:320:394/2
+GTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:616:689/2
+TCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:707:781/2
+GCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:30:104/2
+GTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:249:324/2
+CGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:264:337/2
+TCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:441:515/2
+ATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:553:628/2
+AATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:641:716/2
+ATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:385:460/2
+GGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:46:121/2
+CATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:609:683/2
+TGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:169:243/2
+TATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:601:677/2
+TAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:295:372/2
+AACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:347:422/2
+ATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:382:456/2
+CTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:92:167/2
+TTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:654:729/2
+ATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:524:598/2
+GTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:742:816/2
+ACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:178:253/2
+TGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:755:828/2
+TACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:200:274/2
+TCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:425:499/2
+AACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:470:546/2
+AGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:198:273/2
+CGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:692:766/2
+GTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:543:616/2
+CGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:161:235/2
+TCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:908:984/2
+ACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:729:805/2
+CGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:798:873/2
+AACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:760:835/2
+AATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:135:210/2
+CTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:443:519/2
+ATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:1:75/2
+GTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAATCACTGATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:400:474/2
+ACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:44:118/2
+TTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:865:937/2
+ACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:702:776/2
+ATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:199:272/2
+GAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:58:132/2
+TATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:772:848/2
+GTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:748:823/2
+CACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:510:583/2
+ATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:862:935/2
+TAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:324:399/2
+GGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:489:563/2
+TGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:637:711/2
+AATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:451:526/2
+AATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:713:787/2
+GCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:597:671/2
+CGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:246:318/2
+GCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:658:733/2
+ACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:624:700/2
+AACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:275:349/2
+ACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:497:572/2
+TCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:204:277/2
+TACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:427:502/2
+TTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:550:624/2
+TGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:188:262/2
+TTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:953:1030/2
+TATTGTTAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:924:1000/2
+GGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:236:310/2
+TGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:67:141/2
+TAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:750:825/2
+GACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:746:822/2
+ACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:345:419/2
+CATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:225:300/2
+GGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:254:329/2
+TGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:783:856/2
+GTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:214:290/2
+GGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:499:575/2
+AACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:439:513/2
+TATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:452:525/2
+ATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:196:271/2
+AATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:68:142/2
+TTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:879:956/2
+GTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:211:286/2
+TGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:570:645/2
+AGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:465:539/2
+CCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:701:776/2
+ATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:715:789/2
+TAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:124:198/2
+GCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:641:715/2
+TTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:368:441/2
+ATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:654:728/2
+TTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:901:976/2
+GTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:178:252/2
+GTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:825:900/2
+GAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:10:85/2
+ATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:116:189/2
+AAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:145:221/2
+ATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:526:602/2
+GGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:934:1009/2
+TTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:893:969/2
+ATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:774:849/2
+CGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:763:836/2
+CAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:469:544/2
+GGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:905:977/2
+CGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:108:183/2
+GAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:782:855/2
+TGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:811:886/2
+GACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:383:457/2
+GCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:331:405/2
+CTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:719:794/2
+TAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:847:921/2
+GGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:330:404/2
+TGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:757:831/2
+GATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:899:974/2
+GTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa b/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa
new file mode 100644
index 0000000..021f2f2
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon.fa b/ariba/tests/data/cluster_test_full_run_insert_codon.fa
new file mode 100644
index 0000000..be61945
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon.tsv b/ariba/tests/data/cluster_test_full_run_insert_codon.tsv
new file mode 100644
index 0000000..da35140
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon.tsv
@@ -0,0 +1 @@
+presence_absence1	1	0	.	.	Generic description of presence_absence1
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa b/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa
new file mode 100644
index 0000000..3a4a124
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGC
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
+GTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCT
+GGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCA
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAA
+TTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCA
+TGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAG
+GGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGT
+TCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCAT
+CAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTA
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGA
+TTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
+GTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTT
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
+AGTCTCTGGCACGTCTGACGACTCAATCTCGAGATCGTACACGGCGCTGTAGAGCCATGT
+AATCCCTCTGTTAAAGATCTGGCGACCTTAACAATAGAAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq
new file mode 100644
index 0000000..160a42e
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq
@@ -0,0 +1,592 @@
+ at presence_absence1:1:79:154/1
+CAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:904:979/1
+GTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:869:942/1
+GCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:238:314/1
+CCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:202:276/1
+CTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:560:635/1
+ATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:321:394/1
+CGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:795:871/1
+ACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:489:563/1
+CGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:789:865/1
+TACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:292:367/1
+CGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:381:457/1
+TACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:673:747/1
+GGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:163:239/1
+CTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:69:143/1
+TGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:812:888/1
+TCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:822:895/1
+ACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:61:138/1
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:969:1041/1
+CAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTCTGACGACTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:880:955/1
+AGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:941:1015/1
+AAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:67:141/1
+CCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:470:546/1
+GGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:482:555/1
+TGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:714:790/1
+TCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:857:932/1
+TATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:312:386/1
+ACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:26:99/1
+CGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:307:381/1
+AGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:587:661/1
+AACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:734:808/1
+GATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:657:730/1
+TTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:584:657/1
+AAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:239:314/1
+CAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:912:987/1
+ATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:352:426/1
+AAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:622:695/1
+GAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:514:587/1
+ATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:1:76/1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:406:481/1
+ACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:13:88/1
+TCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:246:321/1
+CTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:107:181/1
+GCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:425:500/1
+TAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:255:329/1
+TTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:126:201/1
+CGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:458:532/1
+ACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:352:427/1
+AAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:303:378/1
+TACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:640:715/1
+CGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:598:672/1
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:865:938/1
+TCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:938:1012/1
+ACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:237:311/1
+GCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:626:701/1
+CCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:487:561/1
+TGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:458:533/1
+ACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:360:434/1
+ATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:892:966/1
+ACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:327:402/1
+AACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:655:728/1
+AGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:944:1020/1
+TCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:281:355/1
+GCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:876:950/1
+TACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:29:103/1
+GGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:439:514/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:843:916/1
+CCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:48:121/1
+GCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:641:717/1
+GGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:448:524/1
+ACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:472:545/1
+CCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:298:373/1
+GAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:84:158/1
+ATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:473:547/1
+CTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:115:190/1
+GGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:803:877/1
+GGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:241:316/1
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:138:213/1
+CTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:845:918/1
+TAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:492:567/1
+TGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:908:983/1
+CGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:624:699/1
+AACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:235:310/1
+TGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:823:896/1
+CCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:283:358/1
+AGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:255:331/1
+TTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:72:148/1
+CCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:615:688/1
+GTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:936:1009/1
+ACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:711:784/1
+TTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:590:666/1
+TGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:747:820/1
+TGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:234:309/1
+ATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:325:399/1
+ACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:214:287/1
+TAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:155:230/1
+TCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:351:427/1
+AAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:734:809/1
+GATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:756:830/1
+TACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:266:341/1
+GAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:639:714/1
+ACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:278:353/1
+TTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:265:341/1
+TGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:394:468/1
+ACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:918:993/1
+TGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:818:891/1
+GGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:745:818/1
+GGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:282:356/1
+CAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:942:1016/1
+AGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:562:637/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:778:854/1
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:445:519/1
+TTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:366:440/1
+GCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:413:488/1
+TTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:564:638/1
+TATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:774:850/1
+CTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:101:175/1
+GCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:958:1033/1
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:128:203/1
+CGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:860:934/1
+TTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:672:747/1
+CGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:676:750/1
+ACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:188:262/1
+CTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:186:260/1
+CGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:647:722/1
+TTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:743:818/1
+TGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:477:552/1
+CAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:793:868/1
+TTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:33:106/1
+ACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:187:261/1
+GCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:681:756/1
+CTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:899:973/1
+TGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:472:547/1
+CCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:36:111/1
+CTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:617:693/1
+GCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:832:905/1
+TTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:168:242/1
+AAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:428:503/1
+GGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:19:92/1
+ACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:132:204/1
+AGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:538:613/1
+GGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:690:763/1
+CCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:439:514.dup.2/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:592:666/1
+CAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:445:519.dup.2/1
+TTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:530:605/1
+GCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:521:595/1
+CGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:148:178:250/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq
new file mode 100644
index 0000000..33460df
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq
@@ -0,0 +1,592 @@
+ at presence_absence1:1:79:154/2
+GTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:904:979/2
+GATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:869:942/2
+GACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:238:314/2
+AGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:202:276/2
+ACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:560:635/2
+TTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:321:394/2
+GTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:795:871/2
+GACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:489:563/2
+CGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:789:865/2
+GTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:292:367/2
+GGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:381:457/2
+GCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:673:747/2
+ACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:163:239/2
+GTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:69:143/2
+ATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:812:888/2
+GGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:822:895/2
+GACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:61:138/2
+CACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:969:1041/2
+TCTATTGTTAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:880:955/2
+GTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:941:1015/2
+AACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:67:141/2
+TAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:470:546/2
+CTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:482:555/2
+AGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:714:790/2
+GCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:857:932/2
+GAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:312:386/2
+CGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:26:99/2
+GTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:307:381/2
+TTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:587:661/2
+TCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:734:808/2
+TCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:657:730/2
+CCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:584:657/2
+ATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:239:314/2
+AGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:912:987/2
+CCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:352:426/2
+CCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:622:695/2
+GTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:514:587/2
+CCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:1:76/2
+CGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAATCACTGATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:406:481/2
+TCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:13:88/2
+GACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:246:321/2
+GTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:107:181/2
+ATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:425:500/2
+GACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:255:329/2
+TGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:126:201/2
+CCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:458:532/2
+CCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:352:427/2
+GCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:303:378/2
+CTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:640:715/2
+AGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:598:672/2
+CCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:865:938/2
+GATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:938:1012/2
+AGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:237:311/2
+GTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:626:701/2
+AGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:487:561/2
+TTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:458:533/2
+TCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:360:434/2
+TAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:892:966/2
+AGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:327:402/2
+CTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:655:728/2
+TTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:944:1020/2
+TCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:281:355/2
+CCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:876:950/2
+AGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:29:103/2
+TTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:439:514/2
+TTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:843:916/2
+GAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:48:121/2
+CATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:641:717/2
+GTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:448:524/2
+ATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:472:545/2
+TGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:298:373/2
+CAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:84:158/2
+TAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:473:547/2
+CCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:115:190/2
+CAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:803:877/2
+GTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:241:316/2
+ATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:138:213/2
+TGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:845:918/2
+AGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:492:567/2
+CGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:908:983/2
+GTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:624:699/2
+CTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:235:310/2
+TGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:823:896/2
+TGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:283:358/2
+TGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:255:331/2
+ACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:72:148/2
+GTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:615:688/2
+ATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:936:1009/2
+GGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:711:784/2
+TACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:590:666/2
+CCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:747:820/2
+CGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:234:309/2
+GTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:325:399/2
+GGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:214:287/2
+ATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:155:230/2
+TAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:351:427/2
+GCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:734:809/2
+CTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:756:830/2
+CAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:266:341/2
+GAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:639:714/2
+GATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:278:353/2
+CTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:265:341/2
+GAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:394:468/2
+TCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:918:993/2
+ACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:818:891/2
+ATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:745:818/2
+TGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:282:356/2
+ACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:942:1016/2
+TAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:562:637/2
+AATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:778:854/2
+CCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:445:519/2
+TGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:366:440/2
+TCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:413:488/2
+TATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:564:638/2
+AAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:774:850/2
+CAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:101:175/2
+CCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:958:1033/2
+TAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:128:203/2
+AGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:860:934/2
+GTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:672:747/2
+ACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:676:750/2
+GGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:188:262/2
+TTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:186:260/2
+ATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:647:722/2
+GTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:743:818/2
+TGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:477:552/2
+GTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:793:868/2
+TTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:33:106/2
+AGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:187:261/2
+TATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:681:756/2
+TTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:899:973/2
+GAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:472:547/2
+CCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:36:111/2
+GAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:617:693/2
+CTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:832:905/2
+AAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:168:242/2
+ATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:428:503/2
+ACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:19:92/2
+GGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:132:204/2
+AAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:538:613/2
+GCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:690:763/2
+TCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:439:514.dup.2/2
+TTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:592:666/2
+CCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:445:519.dup.2/2
+TGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:530:605/2
+CAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:521:595/2
+GAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:148:178:250/2
+GGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa b/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa
new file mode 100644
index 0000000..be61945
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars.fa
new file mode 100644
index 0000000..c7a01d8
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars.fa
@@ -0,0 +1,5 @@
+>presence_absence1
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+>presence_absence2
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTAA
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv b/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv
new file mode 100644
index 0000000..0e711a6
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv
@@ -0,0 +1,2 @@
+presence_absence1	1	0	.	.	Generic description of presence_absence1
+presence_absence2	1	0	.	.	Generic description of presence_absence2
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa
new file mode 100644
index 0000000..3d5115f
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+GTCTAGGCCATTATCGATCGACAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAA
+ACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCT
+TCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTAC
+TTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTT
+GCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAG
+TTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGT
+CGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAAT
+GGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAA
+ATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+CGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGA
+GCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGA
+ATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCC
+TGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCA
+GTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTA
+CCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATG
+AATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGA
+CAACAGTTGAAAACTCACTTGTTCATCCCTGACCTTAGCCGACCACCGCTGGGTAATGAG
+CTCGGTACTGTCTGGCCTGGCACGCAAACAAATGGCTTAG
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq
new file mode 100644
index 0000000..52d79ad
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq
@@ -0,0 +1,584 @@
+ at presence_absence1:1:768:842/1
+CCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:395:471/1
+CTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:732:807/1
+CCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:745:820/1
+AATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:245:320/1
+CTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:280:354/1
+ATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:617:690/1
+CGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:646:721/1
+GGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:239:312/1
+TTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:770:844/1
+ACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:198:273/1
+CAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:441:516/1
+CGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:650:724/1
+CCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:528:601/1
+CAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:840:915/1
+AGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:588:663/1
+ACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:536:610/1
+GCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:153:229/1
+TGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:427:501/1
+TGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:472:547/1
+AATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:498:573/1
+GACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:760:836/1
+AGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:808:884/1
+CGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:912:986/1
+TGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:151:225/1
+ATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:322:398/1
+TACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:790:866/1
+ACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:642:715/1
+TAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:944:1018/1
+TGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTGAAAACTCACTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:358:432/1
+TGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:626:700/1
+CTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:741:814/1
+TAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:419:493/1
+ATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:140:213/1
+GGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:443:517/1
+TGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:83:158/1
+AATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:472:548/1
+AATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:512:586/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:637:709/1
+GCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:624:697/1
+TCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:454:528/1
+TGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:417:492/1
+CAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:928:1003/1
+AGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:214:287/1
+ACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:811:887/1
+GTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:492:567/1
+AGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:875:950/1
+TACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:913:987/1
+GACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:721:796/1
+CCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:90:163/1
+AACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:113:187/1
+GGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:500:574/1
+CCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:512:584/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:781:855/1
+AACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:792:867/1
+TAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:713:786/1
+TGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:600:676/1
+CGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:564:640/1
+AAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:355:429/1
+CGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:382:458/1
+TGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:658:731/1
+GTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:849:923/1
+TTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:623:697/1
+CTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:659:735/1
+TTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:909:984/1
+AGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:498:571/1
+GACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:223:298/1
+CCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:151:226/1
+ATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:164:238/1
+GGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:665:740/1
+GTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCCCGTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:769:842/1
+CACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:855:931/1
+GCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:621:694/1
+TACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:754:828/1
+GCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:542:616/1
+TTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:586:662/1
+GAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:463:536/1
+GTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:449:523/1
+CATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:537:612/1
+CCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:728:802/1
+CTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:550:624/1
+ATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:749:824/1
+ATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:560:635/1
+GGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:145:219/1
+AAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:512:588/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:541:615/1
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:422:498/1
+GTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:22:97/1
+CAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:819:895/1
+ATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:615:690/1
+CTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:559:633/1
+TGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:177:251/1
+TTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:285:359/1
+CGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:393:470/1
+TACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:566:640/1
+GCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:709:782/1
+TTCATGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:297:372/1
+GTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:364:439/1
+CACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:541:614/1
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:196:270/1
+TACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:750:823/1
+TCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:192:268/1
+CGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:743:817/1
+GGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:302:376/1
+TTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:456:530/1
+AAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:931:1004/1
+CAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:654:727/1
+GGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:260:334/1
+TAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:468:542/1
+TTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:558:633/1
+GTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:865:938/1
+TTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:75:149/1
+CTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:39:113/1
+CGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:175:251/1
+CATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:787:860/1
+TTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:206:281/1
+CACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:509:584/1
+GCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:487:564/1
+CGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:515:590/1
+AACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:593:669/1
+CCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:25:100/1
+GTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:111:186/1
+CGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:105:180/1
+TGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:870:945/1
+GATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:633:708/1
+TGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:378:452/1
+TTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:585:659/1
+CGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:25:99/1
+GTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:714:791/1
+GGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:554:628/1
+GCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:65:140/1
+CCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:849:925/1
+TTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:894:970/1
+TTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:357:431/1
+GTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:19:93/1
+CGACAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:616:691/1
+TCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:622:694/1
+ACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:484:557/1
+GATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:874:949/1
+CTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:346:421/1
+TGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:931:1005/1
+CAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:795:870/1
+ATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:476:552/1
+GTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:776:850/1
+TGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:805:879/1
+AAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:332:407/1
+CCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq
new file mode 100644
index 0000000..0b3734f
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq
@@ -0,0 +1,584 @@
+ at presence_absence1:1:768:842/2
+CGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:395:471/2
+TTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:732:807/2
+TATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:745:820/2
+ACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:245:320/2
+TATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:280:354/2
+CAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:617:690/2
+TAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:646:721/2
+TCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:239:312/2
+AGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:770:844/2
+TTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:198:273/2
+CATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:441:516/2
+TGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:650:724/2
+ACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:528:601/2
+CATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:840:915/2
+GTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:588:663/2
+GACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:536:610/2
+GGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:153:229/2
+AAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:427:501/2
+TATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:472:547/2
+AGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:498:573/2
+CCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:760:836/2
+CTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:808:884/2
+CGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:912:986/2
+CGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:151:225/2
+TACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:322:398/2
+TTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:790:866/2
+AATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:642:715/2
+AAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:944:1018/2
+GCCATTTGTTTGCGTGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:358:432/2
+TGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:626:700/2
+CCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:741:814/2
+CCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:419:493/2
+GCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:140:213/2
+ACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:443:517/2
+ATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:83:158/2
+GCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:472:548/2
+TAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:512:586/2
+AACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:637:709/2
+GTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:624:697/2
+GTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:454:528/2
+ACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:417:492/2
+CTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:928:1003/2
+GCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:214:287/2
+GACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:811:887/2
+ACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:492:567/2
+CGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:875:950/2
+GGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:913:987/2
+CCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:721:796/2
+CATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:90:163/2
+CCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:113:187/2
+TACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:500:574/2
+ACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:512:584/2
+CACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:781:855/2
+CTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:792:867/2
+AAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:713:786/2
+AAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAGTAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:600:676/2
+ATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:564:640/2
+CATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:355:429/2
+TGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:382:458/2
+TGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:658:731/2
+TAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:849:923/2
+GTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:623:697/2
+GTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:659:735/2
+CGCTTAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:909:984/2
+AGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:498:571/2
+ATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:223:298/2
+GTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:151:226/2
+CTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:164:238/2
+CCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:665:740/2
+GTACCCGCTTAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:769:842/2
+CGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:855:931/2
+CAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:621:694/2
+GCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:754:828/2
+ATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:542:616/2
+TAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:586:662/2
+ACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:463:536/2
+AAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:449:523/2
+GTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:537:612/2
+TAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:728:802/2
+TAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:550:624/2
+ATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:749:824/2
+TGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:560:635/2
+ATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:145:219/2
+TAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:512:588/2
+GTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:541:615/2
+AGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:422:498/2
+TCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:22:97/2
+TGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:819:895/2
+ACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:615:690/2
+TAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:559:633/2
+CGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:177:251/2
+TGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:285:359/2
+TCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:393:470/2
+TAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:566:640/2
+CATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:709:782/2
+GCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAGTAAAGGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:297:372/2
+GGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:364:439/2
+GGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:541:614/2
+GATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:196:270/2
+GCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:750:823/2
+GGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:192:268/2
+CCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:743:817/2
+AGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:302:376/2
+TGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:456:530/2
+CCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:931:1004/2
+TGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:654:727/2
+CTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:260:334/2
+GTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:468:542/2
+AGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:558:633/2
+CGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:865:938/2
+GAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:75:149/2
+GAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:39:113/2
+GCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:175:251/2
+TGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:787:860/2
+TTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:206:281/2
+GGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:509:584/2
+CACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:487:564/2
+GCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:515:590/2
+TTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:593:669/2
+CTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:25:100/2
+GCGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:111:186/2
+ACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:105:180/2
+CTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:870:945/2
+AACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:633:708/2
+TTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:378:452/2
+TGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:585:659/2
+GGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:25:99/2
+CGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:714:791/2
+AGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:554:628/2
+GGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:65:140/2
+TGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTATCTTATCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:849:925/2
+TTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:894:970/2
+CGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:357:431/2
+GATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:19:93/2
+ACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAATTGGTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:616:691/2
+TTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:622:694/2
+GCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:484:557/2
+AGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:874:949/2
+GATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:346:421/2
+CGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:931:1005/2
+GTGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:795:870/2
+AGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:476:552/2
+GGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:776:850/2
+CAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:805:879/2
+CCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:332:407/2
+TTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa
new file mode 100644
index 0000000..c7a01d8
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa
@@ -0,0 +1,5 @@
+>presence_absence1
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+>presence_absence2
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTAA
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
@@ -1,6 +1,6 @@
 >noncoding1
 CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
 >noncoding2
 TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
 GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
similarity index 77%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
@@ -1,6 +1,6 @@
 >noncoding1
 CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
 >noncoding2
 TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
 GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv
new file mode 100644
index 0000000..ab4e2bd
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv
@@ -0,0 +1,6 @@
+noncoding1	0	0	.	.	generic description of noncoding1
+noncoding1	0	0	A6G	.	variant in ref and reads so should report
+noncoding1	0	0	G9T	.	wild type in ref and reads
+noncoding1	0	0	A14T	.	ref has wild type, reads has variant so should report
+noncoding1	0	0	A40C	.	ref has variant, reads has wild type
+noncoding2	0	0	.	.	generic description of noncoding2
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
similarity index 50%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
index 3278f9e..efcabae 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
@@ -1,6 +1,9 @@
->noncoding1
+>noncoding1_closest
 CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
+>noncoding1
+CGTACGCGGGTAGAGACATGTACTCCACTCACATACATCCCTAAGTTTGTCCCTAAGGCA
+GTGCCCGCCGCCCACGAACGAATGCGCTGAGATGCTTAGTGAACGCCTATCCGAGATCAC
 >noncoding2
 TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
 GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh
new file mode 100644
index 0000000..37f5b75
Binary files /dev/null and b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh differ
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq
new file mode 100644
index 0000000..4356e47
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq
@@ -0,0 +1,144 @@
+ at noncoding1:1:77:136/1
+CATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATTGCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:2:48:107/1
+CTGAGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:3:98:159/1
+TCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:4:126:185/1
+CGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:5:26:85/1
+CGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:6:85:145/1
+CCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:7:53:112/1
+TGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:8:110:170/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:9:73:132/1
+GTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:10:51:110/1
+AGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:11:123:183/1
+GCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:12:63:122/1
+TACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:13:91:150/1
+CCATACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:14:7:68/1
+GACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:15:104:163/1
+AGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:16:1:60/1
+CGTATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:17:64:123/1
+ACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:18:128:185/1
+CCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:19:28:88/1
+TAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:20:97:157/1
+ATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:21:22:81/1
+CGTACGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:22:95:155/1
+ACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:23:119:176/1
+CATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:24:110:169/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:25:110:170.dup.2/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:26:57:117/1
+GCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:27:41:100/1
+CTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:28:18:78/1
+CGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:29:6:65/1
+CGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:30:3:63/1
+TATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:31:66:124/1
+GCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:32:62:122/1
+GTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:33:32:91/1
+GTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGACATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:34:28:86/1
+TAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:35:3:64/1
+TATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:36:120:181/1
+ATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq
new file mode 100644
index 0000000..b3ba738
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq
@@ -0,0 +1,144 @@
+ at noncoding1:1:77:136/2
+TGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:2:48:107/2
+GCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:3:98:159/2
+CGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:4:126:185/2
+AGATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:5:26:85/2
+TGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:6:85:145/2
+GCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:7:53:112/2
+CCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:8:110:170/2
+TATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:9:73:132/2
+TGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:10:51:110/2
+TAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:11:123:183/2
+ATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:12:63:122/2
+GATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:13:91:150/2
+TAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:14:7:68/2
+CTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:15:104:163/2
+CGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:16:1:60/2
+CAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGTACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:17:64:123/2
+GGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:18:128:185/2
+AGATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:19:28:88/2
+TCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:20:97:157/2
+TCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:21:22:81/2
+CGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:22:95:155/2
+GCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:23:119:176/2
+CGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:24:110:169/2
+ATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:25:110:170.dup.2/2
+TATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:26:57:117/2
+CGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:27:41:100/2
+ACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:28:18:78/2
+CGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:29:6:65/2
+AGGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:30:3:63/2
+GGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:31:66:124/2
+CGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:32:62:122/2
+GATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:33:32:91/2
+CGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:34:28:86/2
+GTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:35:3:64/2
+GGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:36:120:181/2
+CCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
similarity index 77%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
@@ -1,6 +1,6 @@
 >noncoding1
 CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
 >noncoding2
 TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
 GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
index 278d834..dc569d7 100644
--- a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
+++ b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
@@ -1,2 +1,2 @@
-cluster1	1628	123728
-cluster2	1952	148352
+cluster1	1624	123424
+cluster2	1946	147896
diff --git a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
index dec7abd..de6fe18 100644
--- a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
+++ b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
@@ -1,31 +1,26 @@
-191	1
-194	3
-196	4
-201	3
-202	3
-203	2
-204	4
-209	8
-210	3
+194	2
+204	1
+209	5
+210	2
 211	1
-212	19
-213	30
-214	43
+212	15
+213	29
+214	42
 215	58
-216	83
+216	80
 217	100
-218	116
+218	114
 219	123
-220	151
-221	170
-222	128
+220	150
+221	168
+222	127
 223	144
-224	108
-225	131
+224	105
+225	128
 226	103
-227	79
+227	78
 228	79
-229	45
+229	44
 230	22
 231	16
 232	1
diff --git a/ariba/tests/data/clusters_test_dummy_db.fa.msh b/ariba/tests/data/clusters_test_dummy_db.fa.msh
new file mode 100644
index 0000000..a208432
Binary files /dev/null and b/ariba/tests/data/clusters_test_dummy_db.fa.msh differ
diff --git a/ariba/tests/data/clusters_test_write_report.tsv b/ariba/tests/data/clusters_test_write_report.tsv
index 9851c6a..c348621 100644
--- a/ariba/tests/data/clusters_test_write_report.tsv
+++ b/ariba/tests/data/clusters_test_write_report.tsv
@@ -1,3 +1,3 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
 gene1	line1
 gene2	line2
diff --git a/ariba/tests/data/reference_data_init_ok.rename.tsv b/ariba/tests/data/reference_data_init_ok.rename.tsv
new file mode 100644
index 0000000..7f75bb0
--- /dev/null
+++ b/ariba/tests/data/reference_data_init_ok.rename.tsv
@@ -0,0 +1,2 @@
+original_gene1	gene1
+original_gene2	gene2
diff --git a/ariba/tests/data/reference_data_load_rename_file.tsv b/ariba/tests/data/reference_data_load_rename_file.tsv
new file mode 100644
index 0000000..ad2a3e0
--- /dev/null
+++ b/ariba/tests/data/reference_data_load_rename_file.tsv
@@ -0,0 +1,2 @@
+original1	ariba1
+original2	ariba2
diff --git a/ariba/tests/data/report_filter_test_init_bad.tsv b/ariba/tests/data/report_filter_test_init_bad.tsv
index f93b0f5..b29210d 100644
--- a/ariba/tests/data/report_filter_test_init_bad.tsv
+++ b/ariba/tests/data/report_filter_test_init_bad.tsv
@@ -1,4 +1,4 @@
-#ef_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster2	variants_only	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id3:baz	free_text3
+#ariba_ref_name	ref_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster2	cluster2	variants_only	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id3:baz	free_text3
diff --git a/ariba/tests/data/report_filter_test_init_good.tsv b/ariba/tests/data/report_filter_test_init_good.tsv
index 2d67b83..4209e31 100644
--- a/ariba/tests/data/report_filter_test_init_good.tsv
+++ b/ariba/tests/data/report_filter_test_init_good.tsv
@@ -1,5 +1,5 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	10.5	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	10.5	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.2	1300	12.4	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id3:spam	free_text3
-cluster2	1	0	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	20.2	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:v:I42L:id4:eggs	free_text3
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	10.5	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	10.5	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.2	1300	12.4	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id3:spam	free_text3
+ariba_cluster2	cluster2	1	0	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	20.2	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:v:I42L:id4:eggs	free_text3
diff --git a/ariba/tests/data/report_filter_test_load_report_bad.tsv b/ariba/tests/data/report_filter_test_load_report_bad.tsv
index 553e60f..09c1820 100644
--- a/ariba/tests/data/report_filter_test_load_report_bad.tsv
+++ b/ariba/tests/data/report_filter_test_load_report_bad.tsv
@@ -1,4 +1,4 @@
-#ef_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id1:bar	free_text2
-cluster2	variants_only	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id1:foo	free_text3
+#ariba_ref_name	ref_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	non_coding	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id1:bar	free_text2
+ariba_cluster2	cluster2	variants_only	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id1:foo	free_text3
diff --git a/ariba/tests/data/report_filter_test_load_report_good.tsv b/ariba/tests/data/report_filter_test_load_report_good.tsv
index 704b716..9897eb7 100644
--- a/ariba/tests/data/report_filter_test_load_report_good.tsv
+++ b/ariba/tests/data/report_filter_test_load_report_good.tsv
@@ -1,5 +1,5 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.2	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.2	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.2	1300	22.2	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id3:spam	free_text3
-cluster2	1	1	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	33.3	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:v:I42L:id4:eggs	free_text3
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.2	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.2	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.2	1300	22.2	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id3:spam	free_text3
+ariba_cluster2	cluster2	1	1	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	33.3	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:v:I42L:id4:eggs	free_text3
diff --git a/ariba/tests/data/report_filter_test_run.expected.tsv b/ariba/tests/data/report_filter_test_run.expected.tsv
index 1bd0ab8..f179170 100644
--- a/ariba/tests/data/report_filter_test_run.expected.tsv
+++ b/ariba/tests/data/report_filter_test_run.expected.tsv
@@ -1,6 +1,6 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	A51G	1	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster2	1	1	179	20000	cluster2	1042	1042	99.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id3:baz	free_text3
-cluster4	1	1	179	20000	cluster4	1042	1042	99.0	cluster4.scaffold.1	1442	14.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	free_text3
-cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	65	265	A;A	766	766	G;C	88;90	.;.	87;90	.	.'
-cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	Q37fs	FSHIFT	109	109	A	634	634	.	67	.	67	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	A51G	1	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster2	cluster2	1	1	179	20000	cluster2	1042	1042	99.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id3:baz	free_text3
+ariba_cluster4	cluster4	1	1	179	20000	cluster4	1042	1042	99.0	cluster4.scaffold.1	1442	14.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	free_text3
+ariba_cluster5	cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	65	265	A;A	766	766	G;C	88;90	G;C	87;90	.	.'
+ariba_cluster5	cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	Q37fs	FSHIFT	109	109	A	634	634	.	67	.	67	.	.
diff --git a/ariba/tests/data/report_filter_test_run.in.tsv b/ariba/tests/data/report_filter_test_run.in.tsv
index b6e46a4..08d3d93 100644
--- a/ariba/tests/data/report_filter_test_run.in.tsv
+++ b/ariba/tests/data/report_filter_test_run.in.tsv
@@ -1,9 +1,9 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	0	0	27	10000	cluster1	1000	0	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	A51G	1	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster2	1	1	179	20000	cluster2	1042	1042	99.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id3:baz	free_text3
-cluster3	1	1	179	20000	cluster3	1042	1042	89.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id4:spam	free_text3
-cluster4	1	1	179	20000	cluster4	1042	1042	99.0	cluster4.scaffold.1	1442	14.6	1	SNP	p	I42L	1	I42L	SYN	112	112	C	442	442	T	300	.	290	a:n:I42L:id5:eggs	free_text3
-cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	65	265	A;A	766	766	G;C	88;90	.;.	87;90	.	.'
-cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	Q37fs	FSHIFT	109	109	A	634	634	.	67	.	67	.	.
-cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	265	265	A;A	766	766	G;C	88;90	.;.	87;90	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	0	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	12.4	1	SNP	n	A51G	1	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster2	cluster2	1	1	179	20000	cluster2	1042	1042	99.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id3:baz	free_text3
+ariba_cluster3	cluster3	1	1	179	20000	cluster3	1042	1042	89.0	cluster2.scaffold.1	1442	13.5	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id4:spam	free_text3
+ariba_cluster4	cluster4	1	1	179	20000	cluster4	1042	1042	99.0	cluster4.scaffold.1	1442	14.6	1	SNP	p	I42L	1	I42L	SYN	112	112	C	442	442	T	300	T	290	a:n:I42L:id5:eggs	free_text3
+ariba_cluster5	cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	65	265	A;A	766	766	G;C	88;90	G;C	87;90	.	.'
+ariba_cluster5	cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	Q37fs	FSHIFT	109	109	A	634	634	.	67	.	67	.	.
+ariba_cluster5	cluster5	1	0	528	1874	cluster5	1188	1097	92.43	cluster5.scaffold.1	2218	20.0	0	.	p	.	0	E89G	NONSYN	265	265	A;A	766	766	G;C	88;90	G;C	87;90	.	.
diff --git a/ariba/tests/data/report_filter_test_write_report.tsv b/ariba/tests/data/report_filter_test_write_report.tsv
index 1cafa7b..6157897 100644
--- a/ariba/tests/data/report_filter_test_write_report.tsv
+++ b/ariba/tests/data/report_filter_test_write_report.tsv
@@ -1,4 +1,4 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	42.4	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	.	500	a:n:C42T:id1:foo	free_text
-cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	42.4	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	.	542	a:n:A51G:id2:bar	free_text2
-cluster2	1	1	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	42.4	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	.	290	a:v:I42L:id3:baz	free_text3
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	42.4	1	SNP	n	C42T	0	.	.	42	42	C	142	142	C	500	C	500	a:n:C42T:id1:foo	free_text
+ariba_cluster1	cluster1	0	0	27	10000	cluster1	1000	999	99.42	cluster1.scaffold.1	1300	42.4	1	SNP	n	A51G	0	.	.	51	51	C	151	151	C	542	C	542	a:n:A51G:id2:bar	free_text2
+ariba_cluster2	cluster2	1	1	179	20000	cluster2	1042	1042	42.42	cluster2.scaffold.1	1442	42.4	1	SNP	p	I42L	1	I42L	NONSYN	112	112	C	442	442	T	300	T	290	a:v:I42L:id3:baz	free_text3
diff --git a/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv b/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
index c652f1c..642064f 100644
--- a/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
+++ b/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
@@ -1,6 +1,6 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A42T	1	A42T	SNP	42	42	A	84	84	T	17	.	17	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-presence_absence_ref1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	0	SNP	p	A10V	.	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
-presence_absence_ref2	1	0	528	232	presence_absence2	1005	554	99.1	presence_absence2.scaffold.1	1032	22.3	0	.	p	.	0	V175L	NONSYN	522	522	G	265	265	C	36	.	36	.	Description foo bar
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A42T	1	A42T	SNP	42	42	A	84	84	T	17	T	17	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	T,G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_presence_absence_ref1	presence_absence_ref1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	0	SNP	p	A10V	.	A10V	NONSYN	27	29	GCA	112	114	GTA	29;28;27	G;T;A	29;28;27	presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_presence_absence_ref2	presence_absence_ref2	1	0	528	232	presence_absence2	1005	554	99.1	presence_absence2.scaffold.1	1032	22.3	0	.	p	.	0	V175L	NONSYN	522	524	GTA	265	267	CTA	36;37;34	C;T;A	36;37;34	.	Description foo bar
diff --git a/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv b/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
index 4a23ebc..465a64e 100644
--- a/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
+++ b/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
@@ -1,6 +1,6 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	G	40,10	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:0:0:A6G:id3:variant in ref and reads so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	0	SNP	p	A10V	.	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
-variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	T,G	40,10	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:0:0:A6G:id3:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	T,G	20,30	noncoding_ref2:0:0:A52T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_presence_absence1	presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	0	SNP	p	A10V	.	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;30	presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_variants_only1	variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
diff --git a/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv b/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
index 159949c..15ecd21 100644
--- a/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
+++ b/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
@@ -1,8 +1,8 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	G	40,10	noncoding1:0:0:A14T:.:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	.	17	.	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
-variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	A;C;C	96	98	A;C;C	12;13;13	.;.;.	12;13;13	variants_only1:1:0:S5T:.:Ref and reads have variant so report	Generic description of variants_only1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	T,G	40,10	noncoding1:0:0:A14T:.:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	T	17	.	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
+ariba_variants_only1	variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	ACC	96	98	ACC	12;13;13	A;C;C	12;13;13	variants_only1:1:0:S5T:.:Ref and reads have variant so report	Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_column_summary_data.tsv b/ariba/tests/data/summary_sample_test_column_summary_data.tsv
index 9c495ec..fa56464 100644
--- a/ariba/tests/data/summary_sample_test_column_summary_data.tsv
+++ b/ariba/tests/data/summary_sample_test_column_summary_data.tsv
@@ -1,8 +1,8 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	.	17	.	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
-variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	A;C;C	96	98	A;C;C	12;13;13	.;.;.	12;13;13	variants_only1:1:0:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	T	17	.	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
+ariba_variants_only1	variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	ACC	96	98	ACC	12;13;13	A;C;C	12;13;13	variants_only1:1:0:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_load_file.in.tsv b/ariba/tests/data/summary_sample_test_load_file.in.tsv
index a125211..d390d82 100644
--- a/ariba/tests/data/summary_sample_test_load_file.in.tsv
+++ b/ariba/tests/data/summary_sample_test_load_file.in.tsv
@@ -1,7 +1,7 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
-variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	A;C;C	96	98	A;C;C	12;13;13	.;.;.	12;13;13	variants_only1:p:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
+ariba_variants_only1	variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	ACC	96	98	ACC	12;13;13	A;C;C	12;13;13	variants_only1:p:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_non_synon_variants.tsv b/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
index b8f5753..c80f2bc 100644
--- a/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
+++ b/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
@@ -1,8 +1,8 @@
-#ref_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	.	17	.	generic description of noncoding1
-noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+#ref_name	ref_type	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	0	SNP	.	.	.	G15T	SNP	15	15	G	85	85	T	17	T	17	.	generic description of noncoding1
+noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
 noncoding1	non_coding	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-presence_absence1	presence_absence	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+presence_absence1	presence_absence	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
 presence_absence1	presence_absence	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
-variants_only1	variants_only	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	A;C;C	96	98	A;C;C	12;13;13	.;.;.	12;13;13	variants_only1:p:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
+variants_only1	variants_only	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	ACC	96	98	ACC	12;13;13	A;C;C	12;13;13	variants_only1:p:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_var_groups.tsv b/ariba/tests/data/summary_sample_test_var_groups.tsv
index 3352660..5360e33 100644
--- a/ariba/tests/data/summary_sample_test_var_groups.tsv
+++ b/ariba/tests/data/summary_sample_test_var_groups.tsv
@@ -1,7 +1,7 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
-noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
-presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
-variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	A;C;C	96	98	A;C;C	12;13;13	.;.;.	12;13;13	variants_only1:1:0:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding1.scaffold.1	279	35.4	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:0:0:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	cluster.n	120	120	98.33	noncoding2.scaffold.1	279	35.4	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.1	267	35.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;20;31	presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_presence_absence1	presence_absence1	1	0	27	88	cluster.p	96	96	98.96	presence_absence1.scaffold.2	267	35.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	Generic description of presence_absence2
+ariba_variants_only1	variants_only1	1	1	27	64	cluster.v	90	90	100.0	variants_only1.scaffold.1	260	42.4	1	SNP	p	S5T	1	.	.	13	15	ACC	96	98	ACC	12;13;13	A;C;C	12;13;13	variants_only1:1:0:S5T:id4:Ref and reads have variant so report	Generic description of variants_only1
diff --git a/ariba/tests/data/summary_test_load_input_files.1.tsv b/ariba/tests/data/summary_test_load_input_files.1.tsv
index 1b683a4..e0a7d79 100644
--- a/ariba/tests/data/summary_test_load_input_files.1.tsv
+++ b/ariba/tests/data/summary_test_load_input_files.1.tsv
@@ -1,3 +1,3 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:p:A10V:id2:Ref has wild, reads have variant so report	Generic description of presence_absence1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_presence_absence1	presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:p:A10V:id2:Ref has wild, reads have variant so report	Generic description of presence_absence1
diff --git a/ariba/tests/data/summary_test_load_input_files.2.tsv b/ariba/tests/data/summary_test_load_input_files.2.tsv
index ccaa3d7..541dac5 100644
--- a/ariba/tests/data/summary_test_load_input_files.2.tsv
+++ b/ariba/tests/data/summary_test_load_input_files.2.tsv
@@ -1,5 +1,5 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
-presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	1	SNP	p	A10V	1	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
-variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1	noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding1	noncoding1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:n:A6G:id2:variant in ref and reads so should report	generic description of noncoding1
+ariba_presence_absence1	presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	1	SNP	p	A10V	1	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_variants_only1	variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
diff --git a/ariba/tests/data/summary_test_whole_run.in.1.tsv b/ariba/tests/data/summary_test_whole_run.in.1.tsv
index 4321687..dfc4d2b 100644
--- a/ariba/tests/data/summary_test_whole_run.in.1.tsv
+++ b/ariba/tests/data/summary_test_whole_run.in.1.tsv
@@ -1,16 +1,18 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1_ref1	0	0	19	100	noncoding1	100	100	99.1	noncoding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding1
-noncoding2_ref1	0	0	19	100	noncoding2	200	200	98.2	noncoding2.scaffold.1	250	42.42	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
-noncoding5_ref1	0	1	531	100	noncoding5	100	100	97.4	noncoding5.scaffold.1	200	14.1	1	SNP	n	A42T	1	A42T	SNP	42	42	A	50	50	T	40	.	40	noncoding5_ref1:0:1:A42T:.:description of A42T	.
-noncoding6_ref1	0	0	531	100	noncoding6	100	100	95.5	noncoding6.scaffold.1	200	24.32	1	SNP	n	A52T	1	A52T	SNP	52	52	A	70	70	T	100	C	70,30	noncoding6_ref1:0:1:A52T:.:description of A52T	.
-noncoding7_ref1	0	0	531	100	noncoding7	100	100	95.4	noncoding7.scaffold.1	200	24.31	1	SNP	n	A53T	1	A53T	SNP	53	53	A	70	70	T	100	C	70,1	noncoding7_ref1:0:1:A53T:.:description of A53T	.
-noncoding8_ref1	0	0	531	100	noncoding8	100	100	95.3	noncoding8.scaffold.1	200	24.29	1	SNP	n	A54T	0	.	.	54	54	A	70	70	A	100	.	.	noncoding8_ref1:0:1:A54T:.:description of A54T	.
-noncoding9_ref1	0	1	531	100	noncoding9	100	100	95.2	noncoding9.scaffold.1	200	24.28	1	SNP	n	A55T	0	.	.	55	55	A	70	70	A	100	.	.	noncoding9_ref1:0:1:A55T:.:description of A55T	.
-noncoding10_ref1	0	0	531	100	noncoding10	100	100	95.1	noncoding10.scaffold.1	200	24.27	0	.	n	.	0	C100T	SNP	100	100	C	150	150	T	100	A	99,1	.	.
-noncoding11_ref1	0	0	531	100	noncoding11	100	100	95.05	noncoding11.scaffold.1	200	24.26	0	HET	.	.	.	G101A	.	100	100	G	150	150	G	100	A	70,30	.	.
-coding1_ref1	1	0	19	100	coding1	100	100	99.1	coding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding1
-coding2_ref1	1	0	27	100	coding2	200	200	98.2	coding2.scaffold.1	250	42.42	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding2
-coding5_ref1	1	1	539	100	coding5	100	100	97.4	coding5.scaffold.1	200	14.1	1	SNP	p	A42S	0	.	.	142	144	A;G;A	50	52	A;G;A	60;61;62	.;.;.	60;61;62	coding5_ref1:0:1:A42S:.:description of A42S	.
-coding6_ref1	1	0	539	100	coding6	100	100	95.5	coding6.scaffold.1	200	24.32	1	SNP	p	A52S	1	A52S	NONSYN	152	152	A	70	70	T	50	.	50	coding6_ref1:0:1:A52S:.:description of A52S	.
-coding7_ref1	1	1	539	100	coding7	100	100	95.4	coding7.scaffold.1	200	24.32	1	SNP	p	A53S	0	.	.	152	154	A;G;A	70	70	A;G;A	71;72;73	.;.;.	71;72;73	coding7_ref1:0:1:A53S:.:description of A53S	.
-coding8_ref1	1	0	539	100	coding8	100	100	95.3	coding8.scaffold.1	200	24.31	0	.	p	.	0	A53S	NONSYN	160	160	A	75	75	T	100	G	100,1	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1_ref1	noncoding1_ref1	0	0	19	100	noncoding1	100	100	99.1	noncoding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding1
+ariba_noncoding2_ref1	noncoding2_ref1	0	0	19	100	noncoding2	200	200	98.2	noncoding2.scaffold.1	250	42.42	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding2
+ariba_noncoding5_ref1	noncoding5_ref1	0	1	531	100	noncoding5	100	100	97.4	noncoding5.scaffold.1	200	14.1	1	SNP	n	A42T	1	A42T	SNP	42	42	A	50	50	T	40	T	40	noncoding5_ref1:0:1:A42T:.:description of A42T	.
+ariba_noncoding6_ref1	noncoding6_ref1	0	0	531	100	noncoding6	100	100	95.5	noncoding6.scaffold.1	200	24.32	1	SNP	n	A52T	1	A52T	SNP	52	52	A	70	70	T	100	T,C	70,30	noncoding6_ref1:0:1:A52T:.:description of A52T	.
+ariba_noncoding7_ref1	noncoding7_ref1	0	0	531	100	noncoding7	100	100	95.4	noncoding7.scaffold.1	200	24.31	1	SNP	n	A53T	1	A53T	SNP	53	53	A	70	70	T	100	T,C	70,1	noncoding7_ref1:0:1:A53T:.:description of A53T	.
+ariba_noncoding8_ref1	noncoding8_ref1	0	0	531	100	noncoding8	100	100	95.3	noncoding8.scaffold.1	200	24.29	1	SNP	n	A54T	0	.	.	54	54	A	70	70	A	100	.	.	noncoding8_ref1:0:1:A54T:.:description of A54T	.
+ariba_noncoding9_ref1	noncoding9_ref1	0	1	531	100	noncoding9	100	100	95.2	noncoding9.scaffold.1	200	24.28	1	SNP	n	A55T	0	.	.	55	55	A	70	70	A	100	.	.	noncoding9_ref1:0:1:A55T:.:description of A55T	.
+ariba_noncoding10_ref1	noncoding10_ref1	0	0	531	100	noncoding10	100	100	95.1	noncoding10.scaffold.1	200	24.27	0	.	n	.	0	C100T	SNP	100	100	C	150	150	T	100	T,A	99,1	.	.
+ariba_noncoding11_ref1	noncoding11_ref1	0	0	531	100	noncoding11	100	100	95.05	noncoding11.scaffold.1	200	24.26	0	HET	.	.	.	G101A	.	100	100	G	150	150	G	100	G,A	70,30	.	.
+ariba_coding1_ref1	coding1_ref1	1	0	19	100	coding1	100	100	99.1	coding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding1
+ariba_coding2_ref1	coding2_ref1	1	0	27	100	coding2	200	200	98.2	coding2.scaffold.1	250	42.42	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding2
+ariba_coding5_ref1	coding5_ref1	1	1	539	100	coding5	100	100	97.4	coding5.scaffold.1	200	14.1	1	SNP	p	A42S	0	.	.	142	144	GCA	50	52	GCA	60;61;62	G;C;A	60;61;62	coding5_ref1:0:1:A42S:.:description of A42S	.
+ariba_coding6_ref1	coding6_ref1	1	0	539	100	coding6	100	100	95.5	coding6.scaffold.1	200	24.32	1	SNP	p	A52S	1	A52S	NONSYN	151	153	GCA	68	70	TCA	50;49;51	T;C;A	50;49;51	coding6_ref1:0:1:A52S:.:description of A52S	.
+ariba_coding7_ref1	coding7_ref1	1	1	539	100	coding7	100	100	95.4	coding7.scaffold.1	200	24.32	1	SNP	p	A53S	0	.	.	154	156	GCA	71	73	GCA	71;72;73	G;C;A	71;72;73	coding7_ref1:0:1:A53S:.:description of A53S	.
+ariba_coding8_ref1	coding8_ref1	1	0	539	100	coding8	100	100	95.3	coding8.scaffold.1	200	24.31	0	.	p	.	0	A53S	NONSYN	160	162	GCA	74	76	TCA	100,1;95,99	T,G;C;A	100,1;95;99	.	.
+ariba_23S.rDNA_WHO_F_01358c	23S.rDNA_WHO_F_01358c	0	1	531	9914	23S	2890	2890	99.86	23S.scaffold.1	3120	744.8	1	SNP	n	C2597T	1	C2597T	SNP	2597	2597	C	2755	2755	T	823	TC,T	487,1	23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T.	High-level resistance to Azithromycin
+ariba_mdfA.3001328.JQ394987.0_1233.561	mdfA.3001328.JQ394987.0_1233.561	1	0	659	336	mdfA	1233	1233	97.0	mdfA.scaffold.1	1464	16.2	0	HET	.	.	.	G261GGGTGTGGTGTGGT,GGGTGTGGT	.	261	261	G	282	282	G	20	GGGTGTGGTGTGGT,GGGTGTGGT	17,2	.	mdfA;Multidrug translocase MdfA
diff --git a/ariba/tests/data/summary_test_whole_run.in.2.tsv b/ariba/tests/data/summary_test_whole_run.in.2.tsv
index b92dd94..c6c4caa 100644
--- a/ariba/tests/data/summary_test_whole_run.in.2.tsv
+++ b/ariba/tests/data/summary_test_whole_run.in.2.tsv
@@ -1,7 +1,8 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding1_ref2	0	0	19	100	noncoding1	100	100	99.2	noncoding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding1
-noncoding3_ref1	0	0	19	100	noncoding3	242	241	97.6	noncoding3.scaffold.1	300	37.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding3
-noncoding5_ref1	0	1	531	100	noncoding5	100	100	99.42	noncoding5.scaffold.1	200	14.1	1	SNP	n	A42T	0	A42T	SNP	42	42	A	50	50	A	20	.	20	.	.
-coding1_ref2	1	0	27	100	coding1	100	100	99.2	coding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding1
-coding3_ref1	1	0	27	100	coding3	242	241	97.6	coding3.scaffold.1	300	37.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding3
-coding5_ref1	1	1	539	100	coding5	100	100	97.4	coding5.scaffold.1	200	14.1	1	SNP	p	A42S	1	A42S	NONSYN	142	144	A	50	50	T	65	.	65	coding5_ref1:0:1:A42S:.:description of A42S	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding1_ref2	noncoding1_ref2	0	0	19	100	noncoding1	100	100	99.2	noncoding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding1
+ariba_noncoding3_ref1	noncoding3_ref1	0	0	19	100	noncoding3	242	241	97.6	noncoding3.scaffold.1	300	37.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of noncoding3
+ariba_noncoding5_ref1	noncoding5_ref1	0	1	531	100	noncoding5	100	100	99.42	noncoding5.scaffold.1	200	14.1	1	SNP	n	A42T	0	A42T	SNP	42	42	A	50	50	A	20	A	20	noncoding5_ref1:0:1:A42T:.:description of A42T	.
+ariba_coding1_ref2	coding1_ref2	1	0	27	100	coding1	100	100	99.2	coding1.scaffold.1	150	10.1	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding1
+ariba_coding3_ref1	coding3_ref1	1	0	27	100	coding3	242	241	97.6	coding3.scaffold.1	300	37.6	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	generic description of coding3
+ariba_coding5_ref1	coding5_ref1	1	1	539	100	coding5	100	100	97.4	coding5.scaffold.1	200	14.1	1	SNP	p	A42S	1	A42S	NONSYN	142	144	GCA	50	52	TCA	65;64;63	T;C;A	65;64;63	coding5_ref1:0:1:A42S:.:description of A42S	.
+ariba_23S.rDNA_WHO_F_01358c	23S.rDNA_WHO_F_01358c	0	1	659	4168	23S	2890	2890	99.84	23S.scaffold.1	3628	344.0	1	SNP	n	C2597T	0	.	.	2597	2597	C	2928	2928	C	410	C,T	301,44	23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T. High-level resistance to Azithromycin.	.
diff --git a/ariba/tests/data/summary_test_whole_run.out.csv b/ariba/tests/data/summary_test_whole_run.out.csv
index 9bd78e5..1a282e3 100644
--- a/ariba/tests/data/summary_test_whole_run.out.csv
+++ b/ariba/tests/data/summary_test_whole_run.out.csv
@@ -1,3 +1,3 @@
-name,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.known_var,coding6.A52S,coding7.assembled,coding7.ref_seq,coding7.pct_id,coding8.assembled,coding8.match,coding8.ref_seq,coding8.pct_i [...]
-/home/ubuntu/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.1.tsv,interrupted,no,coding1_ref1,99.1,yes,yes,coding2_ref1,98.2,no,no,NA,NA,yes,no,coding5_ref1,97.4,no,no,yes,yes,coding6_ref1,95.5,yes,yes,yes,coding7_ref1,95.4,yes,yes,coding8_ref1,95.3,yes,yes,yes,yes,noncoding1_ref1,99.1,yes,yes,noncoding10_ref1,95.1,yes,yes,99.0,yes,yes,noncoding11_ref1,95.05,yes,het,30.0,yes,yes,noncoding2_ref1,98.2,no,no,NA,NA,yes,yes,noncoding5_ref1,97.4,yes,yes,yes,yes,noncoding6_re [...]
-/home/ubuntu/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.2.tsv,yes,yes,coding1_ref2,99.2,no,no,NA,NA,yes,yes,coding3_ref1,97.6,yes,yes,coding5_ref1,97.4,yes,yes,no,no,NA,NA,NA,NA,no,NA,NA,no,no,NA,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,yes,yes,noncoding3_ref1,97.6,yes,no,noncoding5_ref1,99.42,no,no,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,no,NA,NA
+name,23S.assembled,23S.match,23S.ref_seq,23S.pct_id,23S.known_var,23S.C2597T,23S.C2597T.%,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.known_var,coding6.A52S,coding7.assembled,coding7 [...]
+/nfs/users/nfs_m/mh12/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.1.tsv,yes,yes,23S.rDNA_WHO_F_01358c,99.86,yes,yes,100.0,interrupted,no,coding1_ref1,99.1,yes,yes,coding2_ref1,98.2,no,no,NA,NA,yes,no,coding5_ref1,97.4,no,no,yes,yes,coding6_ref1,95.5,yes,yes,yes,coding7_ref1,95.4,yes,yes,coding8_ref1,95.3,yes,yes,interrupted,mdfA.3001328.JQ394987.0_1233.561,97.0,yes,yes,yes,yes,noncoding1_ref1,99.1,yes,yes,noncoding10_ref1,95.1,yes,yes,99.0,yes,yes,noncoding11_ref1,9 [...]
+/nfs/users/nfs_m/mh12/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.2.tsv,yes_nonunique,no,23S.rDNA_WHO_F_01358c,99.84,het,het,12.8,yes,yes,coding1_ref2,99.2,no,no,NA,NA,yes,yes,coding3_ref1,97.6,yes,yes,coding5_ref1,97.4,yes,yes,no,no,NA,NA,NA,NA,no,NA,NA,no,no,NA,NA,NA,NA,no,NA,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,yes,yes,noncoding3_ref1,97.6,yes,no,noncoding5_ref1,99.42,no,no,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA [...]
diff --git a/ariba/tests/data/summary_to_matrix.1.tsv b/ariba/tests/data/summary_to_matrix.1.tsv
index 1957349..fac2f53 100644
--- a/ariba/tests/data/summary_to_matrix.1.tsv
+++ b/ariba/tests/data/summary_to_matrix.1.tsv
@@ -1,5 +1,5 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	.	17	noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A42T	1	A42T	SNP	42	42	A	84	84	T	17	.	17	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-presence_absence_ref1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	0	SNP	p	A10V	.	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	10.0	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	17	T	17	noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A42T	1	A42T	SNP	42	42	A	84	84	T	17	T	17	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	T,G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_presence_absence_ref1	presence_absence_ref1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	20.1	0	SNP	p	A10V	.	A10V	NONSYN	27	29	GCA	112	114	GTA	29;28;26	G;T;A	29;28;26	presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
diff --git a/ariba/tests/data/summary_to_matrix.2.tsv b/ariba/tests/data/summary_to_matrix.2.tsv
index 4a23ebc..296c84d 100644
--- a/ariba/tests/data/summary_to_matrix.2.tsv
+++ b/ariba/tests/data/summary_to_matrix.2.tsv
@@ -1,6 +1,6 @@
-#ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_alt_nt	smtls_alt_depth	var_description	free_text
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	G	40,10	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
-noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	.	18	noncoding1:0:0:A6G:id3:variant in ref and reads so should report	generic description of noncoding1
-noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
-presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	0	SNP	p	A10V	.	A10V	NONSYN	28	28	C	113	113	T	29	.	29	presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
-variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
+#ariba_ref_name	ref_name	gene	var_only	flag	reads	cluster	ref_len	ref_base_assembled	pc_ident	ctg	ctg_len	ctg_cov	known_var	var_type	var_seq_type	known_var_change	has_known_var	ref_ctg_change	ref_ctg_effect	ref_start	ref_end	ref_nt	ctg_start	ctg_end	ctg_nt	smtls_total_depth	smtls_nts	smtls_nts_depth	var_description	free_text
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A14T	1	A14T	SNP	13	13	A	84	84	T	50	T,G	40,10	noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_noncoding_ref1	noncoding_ref1	0	0	19	78	noncoding1	120	120	98.33	noncoding1.scaffold.1	279	50.1	1	SNP	n	A6G	1	.	.	6	6	G	77	77	G	18	G	18	noncoding1:0:0:A6G:id3:variant in ref and reads so should report	generic description of noncoding1
+ariba_noncoding_ref2	noncoding_ref2	0	0	19	78	noncoding2	120	120	98.33	noncoding2.scaffold.1	279	10.0	1	SNP	n	A52T	1	A52T	SNP	42	42	A	84	84	T	17	T,G	20,30	noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report	generic description of noncoding1
+ariba_presence_absence1	presence_absence1	1	0	27	88	presence_absence1	96	96	98.96	presence_absence1.scaffold.1	267	51.1	0	SNP	p	A10V	.	A10V	NONSYN	27	29	GCA	112	114	GTA	29;30;31	G;T;A	29;30;31	presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report	Generic description of presence_absence1
+ariba_variants_only1	variants_only1	1	1	64	12	variants_only1	90	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.	.
diff --git a/ariba/tests/read_filter_test.py b/ariba/tests/read_filter_test.py
index c705835..75aa870 100644
--- a/ariba/tests/read_filter_test.py
+++ b/ariba/tests/read_filter_test.py
@@ -13,7 +13,9 @@ class TestReadFilter(unittest.TestCase):
         self.external_progs = external_progs.ExternalProgs()
 
 
-    def test_run_cdhit_est_2d(self):
+    # skip this, as no longer using cdhit2d, but leave it here in case we want
+    # to put it back in at a later date
+    def _test_run_cdhit_est_2d(self):
         '''test _run_cdhit_est_2d'''
         reads_in = os.path.join(data_dir, 'read_filter_test_run_cdhit_est_2d.reads.in.fa')
         ref_in = os.path.join(data_dir, 'read_filter_test_run_cdhit_est_2d.ref.in.fa')
@@ -33,7 +35,9 @@ class TestReadFilter(unittest.TestCase):
         self.assertEqual(expected, got)
 
 
-    def test_run(self):
+    # skip this, as no longer using cdhit2d, but leave it here in case we want
+    # to put it back in at a later date
+    def _test_run(self):
         '''test run'''
         rstore_infile = os.path.join(data_dir, 'read_filter_test_run.in.read_store')
         ref_fasta = os.path.join(data_dir, 'read_filter_test_run.in.ref.fa')
diff --git a/ariba/tests/ref_preparer_test.py b/ariba/tests/ref_preparer_test.py
index 00b979d..0e25a4e 100644
--- a/ariba/tests/ref_preparer_test.py
+++ b/ariba/tests/ref_preparer_test.py
@@ -132,6 +132,7 @@ class TestRefPreparer(unittest.TestCase):
             got = os.path.join(tmp_out, filename)
             self.assertTrue(filecmp.cmp(expected, got, shallow=False))
 
+        self.assertTrue(os.path.exists(os.path.join(tmp_out, '02.cdhit.all.fa.msh')))
         shutil.rmtree(tmp_out)
 
 
diff --git a/ariba/tests/reference_data_test.py b/ariba/tests/reference_data_test.py
index ae723b2..02f74bd 100644
--- a/ariba/tests/reference_data_test.py
+++ b/ariba/tests/reference_data_test.py
@@ -52,6 +52,23 @@ class TestReferenceData(unittest.TestCase):
         }
 
         self.assertEqual(expected_seqs_dict, ref_data.sequences)
+        self.assertEqual({}, ref_data.ariba_to_original_name)
+
+        rename_file =  os.path.join(data_dir, 'reference_data_init_ok.rename.tsv')
+        ref_data = reference_data.ReferenceData([fasta_in], [tsv_in], rename_file=rename_file)
+        expected_rename_dict = {'gene1': 'original_gene1', 'gene2': 'original_gene2'}
+        self.assertEqual(expected_rename_dict, ref_data.ariba_to_original_name)
+
+
+    def test_load_rename_file(self):
+        '''Test _load_rename_file'''
+        infile = os.path.join(data_dir, 'reference_data_load_rename_file.tsv')
+        got = reference_data.ReferenceData._load_rename_file(infile)
+        expected = {
+            'ariba1': 'original1',
+            'ariba2': 'original2'
+        }
+        self.assertEqual(expected, got)
 
 
     def test_load_metadata_tsv(self):
diff --git a/ariba/tests/report_filter_test.py b/ariba/tests/report_filter_test.py
index 63c94d8..31ff036 100644
--- a/ariba/tests/report_filter_test.py
+++ b/ariba/tests/report_filter_test.py
@@ -12,10 +12,10 @@ class TestReportFilter(unittest.TestCase):
         '''test __init__ on good input file'''
         infile = os.path.join(data_dir, 'report_filter_test_init_good.tsv')
         rf = report_filter.ReportFilter(infile=infile)
-        line1 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', '.', '500', 'a:n:C42T:id1:foo', 'free_text'])
-        line2 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id2:bar', 'free_text2'])
-        line3 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '12.4', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id3:spam', 'free_text3'])
-        line4 = '\t'.join(['cluster2', '1', '0', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '20.2', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', '.', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
+        line1 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', 'C', '500', 'a:n:C42T:id1:foo', 'free_text'])
+        line2 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id2:bar', 'free_text2'])
+        line3 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '12.4', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id3:spam', 'free_text3'])
+        line4 = '\t'.join(['ariba_cluster2', 'cluster2', '1', '0', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '20.2', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', 'T', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
 
         expected = {
             'cluster1': {
@@ -38,8 +38,9 @@ class TestReportFilter(unittest.TestCase):
 
 
     def test_report_line_to_dict(self):
-        line = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t999\t23.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
+        line = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t999\t23.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
         expected = {
+            'ariba_ref_name':     'ariba_cluster1',
             'ref_name':           'cluster1',
             'gene':               '0',
             'var_only':           '0',
@@ -66,8 +67,8 @@ class TestReportFilter(unittest.TestCase):
             'ctg_end':            142,
             'ctg_nt':             'C',
             'smtls_total_depth':  '500',
-            'smtls_alt_nt':       '.',
-            'smtls_alt_depth':    '500',
+            'smtls_nts':       '.',
+            'smtls_nts_depth':    '500',
             'var_description':    'a:n:C42T:id1:foo',
             'free_text':          'free text',
         }
@@ -81,6 +82,7 @@ class TestReportFilter(unittest.TestCase):
     def test_dict_to_report_line(self):
         '''Test _dict_to_report_line'''
         report_dict = {
+            'ariba_ref_name':     'ariba_cluster1',
             'ref_name':           'cluster1',
             'gene':               '0',
             'var_only':           '0',
@@ -107,13 +109,13 @@ class TestReportFilter(unittest.TestCase):
             'ctg_end':            142,
             'ctg_nt':             'C',
             'smtls_total_depth':  '500',
-            'smtls_alt_nt':       '.',
-            'smtls_alt_depth':    '500',
+            'smtls_nts':       '.',
+            'smtls_nts_depth':    '500',
             'var_description':    'a:n:C42T:id1:foo',
             'free_text':          'free text',
         }
 
-        expected = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t1300\t42.4\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
+        expected = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t1300\t42.4\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
         self.assertEqual(expected, report_filter.ReportFilter._dict_to_report_line(report_dict))
 
 
@@ -121,10 +123,10 @@ class TestReportFilter(unittest.TestCase):
         good_infile = os.path.join(data_dir, 'report_filter_test_load_report_good.tsv')
         bad_infile = os.path.join(data_dir, 'report_filter_test_load_report_bad.tsv')
 
-        line1 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', '.', '500', 'a:n:C42T:id1:foo', 'free_text'])
-        line2 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id2:bar', 'free_text2'])
-        line3 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '22.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id3:spam', 'free_text3'])
-        line4 = '\t'.join(['cluster2', '1', '1', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '33.3', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', '.', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
+        line1 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', 'C', '500', 'a:n:C42T:id1:foo', 'free_text'])
+        line2 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id2:bar', 'free_text2'])
+        line3 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '22.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id3:spam', 'free_text3'])
+        line4 = '\t'.join(['ariba_cluster2', 'cluster2', '1', '1', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '33.3', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', 'T', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
 
         expected = {
             'cluster1': {
@@ -181,9 +183,9 @@ class TestReportFilter(unittest.TestCase):
 
     def test_report_dict_passes_essential_filters(self):
         '''Test _report_dict_passes_essential_filters'''
-        line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
-        line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t0\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
-        line3 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t0\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line3 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         tests = [
             (report_filter.ReportFilter._report_line_to_dict(line1), True),
             (report_filter.ReportFilter._report_line_to_dict(line2), False),
@@ -214,8 +216,8 @@ class TestReportFilter(unittest.TestCase):
     def test_filter_list_of_dicts_all_fail(self):
         '''Test _filter_list_of_dicts where all fail'''
         rf = report_filter.ReportFilter()
-        line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t88.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
-        line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t88.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
         dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
         got = rf._filter_list_of_dicts([dict1, dict2])
@@ -225,11 +227,11 @@ class TestReportFilter(unittest.TestCase):
     def test_filter_list_of_dicts_with_essential(self):
         '''Test _filter_list_of_dicts with an essential line but all others fail'''
         rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
-        line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
-        line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
         dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
-        expected_line = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t' + '\t'.join(['.'] * 17) + '\tfree text'
+        expected_line = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t' + '\t'.join(['.'] * 17) + '\tfree text'
         expected = [report_filter.ReportFilter._report_line_to_dict(expected_line)]
         assert expected != [None]
         got = rf._filter_list_of_dicts([dict1, dict2])
@@ -239,9 +241,9 @@ class TestReportFilter(unittest.TestCase):
     def test_filter_list_of_dicts_with_pass(self):
         '''Test _filter_list_of_dicts with a line that passes'''
         rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
-        line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
-        line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
-        line3 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+        line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
+        line3 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
         dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
         dict3 = report_filter.ReportFilter._report_line_to_dict(line3)
@@ -252,9 +254,9 @@ class TestReportFilter(unittest.TestCase):
     def test_remove_all_after_first_frameshift(self):
         '''Test _remove_all_after_first_frameshift'''
         self.assertEqual([], report_filter.ReportFilter._remove_all_after_first_frameshift([]))
-        line1 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t65\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
-        line2 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tQ37fs\tFSHIFT\t109\t109\tA\t634\t634\t.\t67\t.\t67\t.\t.'
-        line3 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t265\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
+        line1 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t65\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
+        line2 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tQ37fs\tFSHIFT\t109\t109\tA\t634\t634\t.\t67\t.\t67\t.\t.'
+        line3 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t265\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
         dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
         dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
         dict3 = report_filter.ReportFilter._report_line_to_dict(line3)
diff --git a/ariba/tests/samtools_variants_test.py b/ariba/tests/samtools_variants_test.py
index ce8f855..e521c87 100644
--- a/ariba/tests/samtools_variants_test.py
+++ b/ariba/tests/samtools_variants_test.py
@@ -78,10 +78,10 @@ class TestSamtoolsVariants(unittest.TestCase):
         tests = [
             ( ('ref1', 42), None ),
             ( ('ref2', 1), None ),
-            ( ('ref1', 0), ('G', '.', 1, '1') ),
-            ( ('ref1', 2), ('T', 'A', 3, '2,1') ),
-            ( ('ref1', 3), ('C', 'A,G', 42, '21,11,10') ),
-            ( ('ref1', 4), ('C', 'AC', 41, '0,42') )
+            ( ('ref1', 0), ('G', 1, '1') ),
+            ( ('ref1', 2), ('T,A', 3, '2,1') ),
+            ( ('ref1', 3), ('C,A,G', 42, '21,11,10') ),
+            ( ('ref1', 4), ('C,AC', 41, '0,42') )
         ]
 
         for (name, position), expected in tests:
@@ -113,12 +113,12 @@ class TestSamtoolsVariants(unittest.TestCase):
         ]
         expected = {
             '16__cat_2_M35190.scaffold.1': {
-                92: ('T', 'A', 123, '65,58'),
-                179: ('A', 'T', 86, '41,45'),
-                263: ('G', 'C', 97, '53,44'),
+                92: ('T,A',123, '65,58'),
+                179: ('A,T', 86, '41,45'),
+                263: ('G,C', 97, '53,44'),
             },
             '16__cat_2_M35190.scaffold.6': {
-                93: ('T', 'G', 99, '56,43')
+                93: ('T,G', 99, '56,43')
             }
         }
 
@@ -159,9 +159,9 @@ class TestSamtoolsVariants(unittest.TestCase):
         )
         samtools_vars.run()
         tests = [
-            (('ref', 425), ('C', 'T', 31, '18,13')),
-            (('not_a_ref', 10), ('ND', 'ND', 'ND', 'ND')),
-            (('ref', 1000000000), ('ND', 'ND', 'ND', 'ND'))
+            (('ref', 425), ('C,T', 31, '18,13')),
+            (('not_a_ref', 10), ('ND', 'ND', 'ND')),
+            (('ref', 1000000000), ('ND', 'ND', 'ND'))
         ]
         for (ref, pos), expected in tests:
             got = samtools_vars.get_depths_at_position(ref, pos)
diff --git a/ariba/tests/summary_cluster_test.py b/ariba/tests/summary_cluster_test.py
index f5022fc..2cf8f19 100644
--- a/ariba/tests/summary_cluster_test.py
+++ b/ariba/tests/summary_cluster_test.py
@@ -8,9 +8,10 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
 class TestSummaryCluster(unittest.TestCase):
     def test_line2dict(self):
         '''Test _line2dict'''
-        line = 'refname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:var_group1:ref has wild type, foo bar\tsome free text'
+        line = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:var_group1:ref has wild type, foo bar\tsome free text'
 
         expected = {
+            'ariba_ref_name': 'ariba_refname',
             'ref_name': 'refname',
             'gene': '1',
             'var_only' : '0',
@@ -37,8 +38,8 @@ class TestSummaryCluster(unittest.TestCase):
             'ctg_end': 84,
             'ctg_nt': 'T',
             'smtls_total_depth': '17',
-            'smtls_alt_nt': '.',
-            'smtls_alt_depth': '17',
+            'smtls_nts': 'T',
+            'smtls_nts_depth': '17',
             'var_description': 'noncoding1:1:0:A14T:var_group1:ref has wild type, foo bar',
             'var_group': 'var_group1',
             'free_text': 'some free text'
@@ -51,9 +52,9 @@ class TestSummaryCluster(unittest.TestCase):
         '''Test add_data_dict'''
         cluster = summary_cluster.SummaryCluster()
         self.assertTrue(cluster.name is None)
-        line1 = 'refname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
-        line2 = 'refname\t1\t0\t19\t78\tcluster2\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id2:ref has wild type, foo bar\tsome free text'
-        line3 = 'refname2\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text'
+        line1 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line2 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster2\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id2:ref has wild type, foo bar\tsome free text'
+        line3 = 'ariba_refname2\trefname2\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text'
         data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
         data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
         data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
@@ -67,13 +68,30 @@ class TestSummaryCluster(unittest.TestCase):
             cluster.add_data_dict(data_dict3)
 
 
+    def test_has_any_part_of_ref_assembled(self):
+        '''Test _has_any_part_of_ref_assembled'''
+        line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t.\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t0\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
+        data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
+        data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
+        cluster = summary_cluster.SummaryCluster()
+        cluster.add_data_dict(data_dict1)
+        self.assertFalse(cluster._has_any_part_of_ref_assembled())
+        cluster.add_data_dict(data_dict2)
+        self.assertFalse(cluster._has_any_part_of_ref_assembled())
+        cluster.add_data_dict(data_dict3)
+        self.assertTrue(cluster._has_any_part_of_ref_assembled())
+
+
     def test_pc_id_of_longest(self):
         '''Test pc_id_of_longest'''
         cluster = summary_cluster.SummaryCluster()
         self.assertTrue(cluster.name is None)
-        line1 = 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
-        line2 = 'refname\t1\t0\t19\t78\tcluster\t120\t119\t98.20\tctg_name2\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
-        line3 = 'refname\t1\t0\t19\t78\tcluster\t120\t114\t98.32\tctg_name3\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t119\t98.20\tctg_name2\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t114\t98.32\tctg_name3\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
         data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
         data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
         data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
@@ -85,11 +103,11 @@ class TestSummaryCluster(unittest.TestCase):
 
     def test_to_cluster_summary_number(self):
         '''Test _to_cluster_summary_assembled'''
-        line = 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+        line = 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text'
         data_dict = summary_cluster.SummaryCluster.line2dict(line)
 
         tests = [
-            ('0', 0, 'no'),
+            ('0', 0, 'partial'),
             ('0', 64, 'no'),
             ('0', 1024, 'no'),
             ('0', 1, 'fragmented'),
@@ -99,7 +117,7 @@ class TestSummaryCluster(unittest.TestCase):
             ('0', 51, 'yes_nonunique'),
             ('0', 147, 'yes_nonunique'),
             ('0', 275, 'yes_nonunique'),
-            ('1', 0, 'no'),
+            ('1', 0, 'partial'),
             ('1', 64, 'no'),
             ('1', 1024, 'no'),
             ('1', 1, 'fragmented'),
@@ -117,20 +135,26 @@ class TestSummaryCluster(unittest.TestCase):
             data_dict['flag'] = flag.Flag(f)
             cluster.add_data_dict(data_dict)
             self.assertEqual(expected, cluster._to_cluster_summary_assembled())
+            if expected == 'partial':
+                original_number = cluster.data[0]['ref_base_assembled']
+                cluster.data[0]['ref_base_assembled'] = 0
+                self.assertEqual('no', cluster._to_cluster_summary_assembled())
+                cluster.data[0]['ref_base_assembled'] = original_number
 
 
     def test_has_known_variant(self):
         '''Test _has_known_variant'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
         ]
 
         dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
-        expected = [True, False, False, False, False]
+        expected = ['yes', 'no', 'no', 'no', 'no', 'het']
         assert len(dicts) == len(expected)
 
         for i in range(len(dicts)):
@@ -139,14 +163,15 @@ class TestSummaryCluster(unittest.TestCase):
 
     def test_has_any_known_variant(self):
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
         ]
 
-        expected = ['yes', 'no', 'no', 'no', 'no']
+        expected = ['yes', 'no', 'no', 'no', 'no', 'het']
         assert len(lines) == len(expected)
 
         for i in range(len(lines)):
@@ -159,16 +184,18 @@ class TestSummaryCluster(unittest.TestCase):
     def test_has_nonsynonymous(self):
         '''Test _has_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
         ]
 
         dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
-        expected = [False, True, False, True, True, True]
+        expected = ['no', 'yes', 'no', 'yes', 'yes', 'yes', 'het', 'het']
         assert len(dicts) == len(expected)
 
         for i in range(len(dicts)):
@@ -178,14 +205,16 @@ class TestSummaryCluster(unittest.TestCase):
     def test_has_any_nonsynonymous(self):
         '''Test _has_any_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:N_ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:N_ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
         ]
 
-        expected = ['no', 'yes', 'no', 'yes', 'yes']
+        expected = ['no', 'yes', 'no', 'yes', 'yes', 'het', 'het']
         assert len(lines) == len(expected)
 
         for i in range(len(lines)):
@@ -198,32 +227,36 @@ class TestSummaryCluster(unittest.TestCase):
     def test_has_novel_nonsynonymous(self):
         '''Test _has_novel_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
         ]
 
         dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
-        expected = [False, False, True, True, True]
+        expected = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'het']
         assert len(dicts) == len(expected)
 
-        for i in range(len(dicts)-1):
+        for i in range(len(dicts)):
             self.assertEqual(expected[i], summary_cluster.SummaryCluster._has_novel_nonsynonymous(dicts[i]))
 
 
     def test_has_any_novel_nonsynonymous(self):
         '''Test _has_any_novel_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\t.\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
+            'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
         ]
 
-        expected = ['no', 'no', 'yes', 'yes', 'yes']
+        expected = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'het']
         assert len(lines) == len(expected)
 
         for i in range(len(lines)):
@@ -236,11 +269,11 @@ class TestSummaryCluster(unittest.TestCase):
     def test_to_cluster_summary_has_known_nonsynonymous(self):
         '''Test _to_cluster_summary_has_known_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
         ]
 
         expected = ['yes', 'yes', 'no', 'no', 'no']
@@ -257,11 +290,11 @@ class TestSummaryCluster(unittest.TestCase):
     def test_to_cluster_summary_has_novel_nonsynonymous(self):
         '''Test _to_cluster_summary_has_novel_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
         ]
 
         expected = ['no', 'no', 'no', 'yes', 'yes']
@@ -278,11 +311,11 @@ class TestSummaryCluster(unittest.TestCase):
     def test_to_cluster_summary_has_nonsynonymous(self):
         '''Test _to_cluster_summary_has_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
         ]
 
         expected = ['no', 'yes', 'no', 'yes', 'yes']
@@ -309,16 +342,16 @@ class TestSummaryCluster(unittest.TestCase):
         self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
 
         d['ref_ctg_effect'] = 'SNP'
-        d['smtls_alt_nt'] = '.'
+        d['smtls_nts'] = '.'
         self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
 
-        d['smtls_alt_nt'] = 'A;G;T'
+        d['smtls_nts'] = 'A;G;T'
         self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
 
         d['known_var_change'] = 'A42T'
         d['ctg_nt'] = 'A'
-        d['smtls_alt_nt'] = 'T'
-        d['smtls_alt_depth'] = '52,48'
+        d['smtls_nts'] = 'A,T'
+        d['smtls_nts_depth'] = '52,48'
         self.assertEqual(('A42T', 48.0), summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
 
 
@@ -326,6 +359,7 @@ class TestSummaryCluster(unittest.TestCase):
         '''Test _get_nonsynonymous_var'''
         d = {
             'ref_name': 'ref',
+            'gene': '1',
             'var_type': '.',
             'known_var_change': '.',
             'has_known_var': '.',
@@ -341,7 +375,7 @@ class TestSummaryCluster(unittest.TestCase):
         d['var_type'] = 'p'
         d['known_var'] = '1'
         d['has_known_var'] = '1'
-        with self.assertRaises(summary_cluster.Error):
+        with self.assertRaises(summary_cluster_variant.Error):
             summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
 
         d['known_var_change'] = 'I42L'
@@ -352,14 +386,14 @@ class TestSummaryCluster(unittest.TestCase):
         d['var_group'] = '.'
 
         d['ref_ctg_change'] = 'P43Q'
-        with self.assertRaises(summary_cluster.Error):
+        with self.assertRaises(summary_cluster_variant.Error):
             summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
 
         d['known_var_change'] = '.'
         self.assertEqual(('ref', 'P43Q', 'novel', None), summary_cluster.SummaryCluster._get_nonsynonymous_var(d))
 
         d['ref_ctg_change'] = '.'
-        with self.assertRaises(summary_cluster.Error):
+        with self.assertRaises(summary_cluster_variant.Error):
             summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
 
         d['ref_ctg_effect'] = 'MULTIPLE'
@@ -369,16 +403,16 @@ class TestSummaryCluster(unittest.TestCase):
     def test_has_match(self):
         '''Test _has_match'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
         ]
 
         expected = ['yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'no', 'no']
@@ -396,14 +430,14 @@ class TestSummaryCluster(unittest.TestCase):
     def test_has_var_groups(self):
         '''Test has_var_groups'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id2:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id4:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id5:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id6:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id2:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id4:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id5:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id6:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
         ]
         dicts = [summary_cluster.SummaryCluster.line2dict(line) for line in lines]
         cluster = summary_cluster.SummaryCluster()
@@ -416,8 +450,8 @@ class TestSummaryCluster(unittest.TestCase):
 
     def test_column_summary_data(self):
         '''Test column_summary_data'''
-        line1 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:n:A14T:id1:foo_bar\tspam eggs'
-        line2 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
+        line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:n:A14T:id1:foo_bar\tspam eggs'
+        line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
 
         data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
         data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
@@ -438,8 +472,8 @@ class TestSummaryCluster(unittest.TestCase):
 
     def test_non_synon_variants(self):
         '''Test non_synon_variants'''
-        line1 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs'
-        line2 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
+        line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs'
+        line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
 
         data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
         data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
@@ -454,10 +488,10 @@ class TestSummaryCluster(unittest.TestCase):
     def test_known_noncoding_het_snps(self):
         '''test known_noncoding_het_snps'''
         lines = [
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tT\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs'
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs'
         ]
 
         cluster = summary_cluster.SummaryCluster()
@@ -466,7 +500,7 @@ class TestSummaryCluster(unittest.TestCase):
         got = cluster.known_noncoding_het_snps()
         expected = {
             '.': {'A82T': 40.0},
-            'id1': {'A42T': 25.0},
+            'id1': {'A42T': 25.0, 'A14T': 100.0},
             'id2': {'A62T': 75.0},
         }
         self.assertEqual(expected, got)
@@ -475,10 +509,10 @@ class TestSummaryCluster(unittest.TestCase):
     def test_get_all_nonsynon_variants_set(self):
         '''test _get_all_nonsynon_variants_set'''
         lines = [
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
-            'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
+            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
         ]
 
         data_dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
@@ -492,10 +526,10 @@ class TestSummaryCluster(unittest.TestCase):
     def test_gather_data(self):
         '''test gather_data'''
         lines = [
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
-            'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
+            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
         ]
 
         data_dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
diff --git a/ariba/tests/summary_cluster_variant_test.py b/ariba/tests/summary_cluster_variant_test.py
index d2e8377..f88cc0c 100644
--- a/ariba/tests/summary_cluster_variant_test.py
+++ b/ariba/tests/summary_cluster_variant_test.py
@@ -7,12 +7,12 @@ class TestSummaryClusterVariant(unittest.TestCase):
     def test_has_nonsynonymous(self):
         '''Test _has_nonsynonymous'''
         lines = [
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
-            'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\tC;C\t207;204\t.\t.',
+            'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\tC;C\t207;204\t.\t.'
         ]
 
         dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
@@ -48,51 +48,50 @@ class TestSummaryClusterVariant(unittest.TestCase):
 
     def  test_get_is_het_and_percent(self):
         '''test _get_is_het_and_percent'''
-        lines = [
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tT\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT\t95,5\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT\t90,10\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,C\t90,6,4\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,C\t3,7,90\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tHET\t.\t.\t.\t.\t.\t.\t.\t.\t84\t84\tA\t50\tT\t40,10\t.\t.'
-        ]
-
-        expected = [
-            (False, None),
-            (True, 25.0),
-            (True, 75.0),
-            (True, 40.0),
-            (False, 5.0),
-            (True, 10.0),
-            (True, 6.0),
-            (True, 7.0),
-            (True, 20.0)
+        tests = [
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', (False, 100.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs', (True, 25.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs', (True, 75.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 40.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T\t95,5\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (False, 5.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 10.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,C\t90,6,4\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 6.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,C\t3,7,90\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 7.0)),
+            ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tHET\t.\t.\t.\t.\t.\t.\t.\t.\t84\t84\tA\t50\tA,T\t40,10\t.\t.', (True, 20.0)),
+            ('ariba_ref1\t23S.rDNA_WHO_F_01358c\t0\t1\t531\t9914\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3120\t744.8\t1\tSNP\tn\tC2597T\t1\tC2597T\tSNP\t2597\t2597\tC\t2755\t2755\tT\t823\tTC,T\t487,1\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T.\tHigh-level resistance to Azithromycin', (False, 100.0)),
+            ('ariba\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t90,10\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 10.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t91,9\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (False, 9.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t50,50\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 50.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 30.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t91,9\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (False, 91.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t90,10\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 90.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t50,50\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 50.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t10,90\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 10.0)),
+            ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t9,91\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 9.0)),
         ]
-        assert len(lines) == len(expected)
 
-        for i in range(len(lines)):
-            data_dict = summary_cluster.SummaryCluster.line2dict(lines[i])
+        for line, expected in tests:
+            data_dict = summary_cluster.SummaryCluster.line2dict(line)
             got = summary_cluster_variant.SummaryClusterVariant._get_is_het_and_percent(data_dict)
-            self.assertEqual(expected[i], got)
+            self.assertEqual(expected, got)
 
 
     def test_init(self):
         '''test __init__'''
         lines = [
-            'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
-            'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+            'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\t.\t.\t13\t13\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
         ]
 
         expected = [
             {'coding': True, 'known': True, 'var_string': 'I14L', 'var_group': '.', 'het_percent': None},
-            {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': None},
+            {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 100.0},
             {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 25.0},
             {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 50.0},
+            {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 10.0},
         ]
         assert len(lines) == len(expected)
 
diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py
index 280b301..e54a0fd 100644
--- a/ariba/tests/summary_test.py
+++ b/ariba/tests/summary_test.py
@@ -65,7 +65,7 @@ class TestSummary(unittest.TestCase):
 
 
     def test_gather_unfiltered_output_data(self):
-        '''test gather_output_rows_new'''
+        '''test gather_unfiltered_output_data'''
         infiles = [
             os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.1.tsv'),
             os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.2.tsv')
@@ -111,12 +111,12 @@ class TestSummary(unittest.TestCase):
                 },
                 'presence_absence2': {
                     'summary': {
-                            'assembled': 'no',
-                            'known_var': 'NA',
+                            'assembled': 'partial',
+                            'known_var': 'no',
                             'match': 'no',
-                            'novel_var': 'NA',
-                            'pct_id': 'NA',
-                            'ref_seq': 'NA'
+                            'novel_var': 'yes',
+                            'pct_id': '99.1',
+                            'ref_seq': 'presence_absence_ref2'
                     },
                     'groups': {},
                     'vars': {}
@@ -212,17 +212,18 @@ class TestSummary(unittest.TestCase):
             }
         }
 
+        self.maxDiff = None
         s = summary.Summary('out', filenames=infiles)
         s.samples = summary.Summary._load_input_files(infiles, 90)
         s._gather_unfiltered_output_data()
         self.assertEqual(expected_potential_cols, s.all_potential_columns)
         self.assertEqual(expected_all, s.all_data)
 
-        expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%'}
+        expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%', 'id3.%'}
         expected_potential_cols['noncoding2']['groups'] = {'id2.%', 'id2'}
-        expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes'}
+        expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes', 'id1.%': 100.0}
         expected_all[infiles[0]]['noncoding2']['groups'] = {'id2': 'yes_multi_het', 'id2.%': 'NA'}
-        expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes'}
+        expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes', 'id3.%': 100.0}
         expected_all[infiles[1]]['noncoding2']['groups'] = {'id2': 'het', 'id2.%': 40.0}
         s = summary.Summary('out', filenames=infiles, show_var_groups=True)
         s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -230,12 +231,12 @@ class TestSummary(unittest.TestCase):
         self.assertEqual(expected_potential_cols, s.all_potential_columns)
         self.assertEqual(expected_all, s.all_data)
 
-        expected_potential_cols['noncoding1']['vars'] = {'A14T.%', 'A6G', 'A14T'}
-        expected_potential_cols['noncoding2']['vars'] = {'A52T', 'A52T.%', 'A42T'}
+        expected_potential_cols['noncoding1']['vars'] = {'A14T.%', 'A6G', 'A6G.%', 'A14T'}
+        expected_potential_cols['noncoding2']['vars'] = {'A52T', 'A52T.%', 'A42T', 'A42T.%'}
 
-        expected_all[infiles[0]]['noncoding1']['vars'] = {'A14T': 'yes'}
-        expected_all[infiles[0]]['noncoding2']['vars'] = {'A42T': 'yes', 'A52T': 'het', 'A52T.%': 40.0}
-        expected_all[infiles[1]]['noncoding1']['vars'] = {'A14T': 'het', 'A14T.%': 80.0, 'A6G': 'yes'}
+        expected_all[infiles[0]]['noncoding1']['vars'] = {'A14T': 'yes', 'A14T.%': 100.0}
+        expected_all[infiles[0]]['noncoding2']['vars'] = {'A42T': 'yes', 'A42T.%': 100.0, 'A52T': 'het', 'A52T.%': 40.0}
+        expected_all[infiles[1]]['noncoding1']['vars'] = {'A14T': 'het', 'A14T.%': 80.0, 'A6G': 'yes', 'A6G.%': 100.0}
         expected_all[infiles[1]]['noncoding2']['vars'] = {'A52T': 'het', 'A52T.%': 40.0}
         s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True)
         s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -244,7 +245,9 @@ class TestSummary(unittest.TestCase):
         self.assertEqual(expected_all, s.all_data)
 
         expected_potential_cols['presence_absence1']['vars'] = {'A10V'}
+        expected_potential_cols['presence_absence2']['vars'] = {'V175L'}
         expected_all[infiles[0]]['presence_absence1']['vars'] = {'A10V': 'yes'}
+        expected_all[infiles[0]]['presence_absence2']['vars'] = {'V175L': 'yes'}
         expected_all[infiles[1]]['presence_absence1']['vars'] = {'A10V': 'yes'}
         s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True, show_novel_vars=True)
         s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -265,11 +268,11 @@ class TestSummary(unittest.TestCase):
         s._gather_unfiltered_output_data()
         got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
 
-        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncodin [...]
-        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.A42T', 'noncoding2. [...]
+        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding1.A6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known [...]
+        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding1.A6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2 [...]
         expected_matrix = [
-            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
-            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
+            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
+            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
         ]
 
         self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -289,11 +292,11 @@ class TestSummary(unittest.TestCase):
         s._gather_unfiltered_output_data()
         got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
 
-        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1' [...]
-        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'p [...]
+        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence [...]
+        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_a [...]
         expected_matrix = [
-            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'],
-            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes']
+            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'],
+            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes']
         ]
 
         self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -313,11 +316,11 @@ class TestSummary(unittest.TestCase):
         s._gather_unfiltered_output_data()
         got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
 
-        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.A42T:o1', 'noncoding2.A52T:o1', 'noncoding2.A52T.%:c2', 'prese [...]
-        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.A42T', 'noncoding2.A52T', 'noncoding2.A52T.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presenc [...]
+        expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding1.A6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.A42T:o1', 'noncoding2.A42T.%:c2', 'nonc [...]
+        expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding1.A6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.A42T', 'noncoding2.A42T.%', 'noncoding2.A52T', 'noncoding2.A52T.%', 'presence_absence1.assemb [...]
         expected_matrix = [
-            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
-            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
+            [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
+            [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
         ]
 
         self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -559,6 +562,7 @@ class TestSummary(unittest.TestCase):
             expected = [line.rstrip().split(',', maxsplit=1)[1] for line in f]
         with open(tmp_out + '.csv') as f:
             got = [line.rstrip().split(',', maxsplit=1)[1] for line in f]
+
         self.assertEqual(expected, got)
         os.unlink(tmp_out + '.csv')
         os.unlink(tmp_out + '.phandango.csv')
diff --git a/scripts/ariba b/scripts/ariba
index a9287ba..381c69c 100755
--- a/scripts/ariba
+++ b/scripts/ariba
@@ -49,6 +49,7 @@ subparser_getref = subparsers.add_parser(
     usage='ariba getref [options] <db> <outprefix>',
     description='Download reference data from one of a few supported public resources',
 )
+subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
 subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card')
 subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
 subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
@@ -75,12 +76,13 @@ cdhit_group.add_argument('--cdhit_clusters', help='File specifying how the seque
 cdhit_group.add_argument('--cdhit_min_id', type=float, help='Sequence identity threshold (cd-hit option -c) [%(default)s]', default=0.9, metavar='FLOAT')
 cdhit_group.add_argument('--cdhit_min_length', type=float, help='length difference cutoff (cd-hit option -s) [%(default)s]', default=0.9, metavar='FLOAT')
 
-other_group = subparser_prepareref.add_argument_group('other options')
-other_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6)
-other_group.add_argument('--max_gene_length', type=int, help='Maximum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=10000)
-other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
-other_group.add_argument('--threads', type=int, help='Number of threads (currently only applies to cdhit) [%(default)s]', default=1, metavar='INT')
-other_group.add_argument('--verbose', action='store_true', help='Be verbose')
+other_prep_group = subparser_prepareref.add_argument_group('other options')
+other_prep_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6)
+other_prep_group.add_argument('--max_gene_length', type=int, help='Maximum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=10000)
+other_prep_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
+other_prep_group.add_argument('--force', action='store_true', help='Overwrite output directory, if it already exists')
+other_prep_group.add_argument('--threads', type=int, help='Number of threads (currently only applies to cdhit) [%(default)s]', default=1, metavar='INT')
+other_prep_group.add_argument('--verbose', action='store_true', help='Be verbose')
 
 subparser_prepareref.add_argument('outdir', help='Output directory (must not already exist)')
 subparser_prepareref.set_defaults(func=ariba.tasks.prepareref.run)
@@ -101,20 +103,20 @@ subparser_refquery.set_defaults(func=ariba.tasks.refquery.run)
 
 
 #----------------------------- reportfilter -------------------------------
-subparser_reportfilter = subparsers.add_parser(
-    'reportfilter',
-    help='Filters a report tsv file',
-    description='Filters an ARIBA report tsv file made by "ariba run"',
-    usage='ariba reportfilter [options] <infile> <outfile>'
-)
-subparser_reportfilter.add_argument('--exclude_flags', help='Comma-separated list of flags to exclude. [%(default)s]', default='assembly_fail,ref_seq_choose_fail')
-subparser_reportfilter.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
-subparser_reportfilter.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
-subparser_reportfilter.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
-subparser_reportfilter.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
-subparser_reportfilter.add_argument('infile', help='Name of input tsv file')
-subparser_reportfilter.add_argument('outfile', help='Name of output tsv file')
-subparser_reportfilter.set_defaults(func=ariba.tasks.reportfilter.run)
+#subparser_reportfilter = subparsers.add_parser(
+#    'reportfilter',
+#    help='Filters a report tsv file',
+#    description='Filters an ARIBA report tsv file made by "ariba run"',
+#    usage='ariba reportfilter [options] <infile> <outfile>'
+#)
+#subparser_reportfilter.add_argument('--exclude_flags', help='Comma-separated list of flags to exclude. [%(default)s]', default='assembly_fail,ref_seq_choose_fail')
+#subparser_reportfilter.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
+#subparser_reportfilter.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
+#subparser_reportfilter.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
+#subparser_reportfilter.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
+#subparser_reportfilter.add_argument('infile', help='Name of input tsv file')
+#subparser_reportfilter.add_argument('outfile', help='Name of output tsv file')
+#subparser_reportfilter.set_defaults(func=ariba.tasks.reportfilter.run)
 
 
 #----------------------------- run -------------------------------
@@ -139,15 +141,16 @@ assembly_group = subparser_run.add_argument_group('Assembly options')
 assembly_group.add_argument('--assembly_cov', type=int, help='Target read coverage when sampling reads for assembly [%(default)s]', default=50, metavar='INT')
 assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')
 
-other_group = subparser_run.add_argument_group('Other options')
-#other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
-other_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
-other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
-other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
-other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
-other_group.add_argument('--noclean', action='store_true', help='Do not clean up intermediate files')
-other_group.add_argument('--tmp_dir', help='Existing directory in which to create a temporary directory used for local assemblies')
-other_group.add_argument('--verbose', action='store_true', help='Be verbose')
+other_run_group = subparser_run.add_argument_group('Other options')
+#other_run_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+other_run_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
+other_run_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
+other_run_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
+other_run_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
+other_run_group.add_argument('--force', action='store_true', help='Overwrite output directory, if it already exists')
+other_run_group.add_argument('--noclean', action='store_true', help='Do not clean up intermediate files')
+other_run_group.add_argument('--tmp_dir', help='Existing directory in which to create a temporary directory used for local assemblies')
+other_run_group.add_argument('--verbose', action='store_true', help='Be verbose')
 subparser_run.set_defaults(func=ariba.tasks.run.run)
 
 
diff --git a/setup.py b/setup.py
index 416b0f4..dc7533f 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
 setup(
     ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
     name='ariba',
-    version='2.2.5',
+    version='2.3.0',
     description='ARIBA: Antibiotic Resistance Identification By Assembly',
     packages = find_packages(),
     package_data={'ariba': ['test_run_data/*']},

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git



More information about the debian-med-commit mailing list