[med-svn] [ariba] 01/02: New upstream version 2.3.0+ds
Sascha Steinbiss
satta at debian.org
Wed Oct 12 16:10:35 UTC 2016
This is an automated email from the git hooks/post-receive script.
satta pushed a commit to branch master
in repository ariba.
commit 4c3626816798ca60cc56e5726399173a3f152d11
Author: Sascha Steinbiss <satta at debian.org>
Date: Wed Oct 12 16:00:03 2016 +0000
New upstream version 2.3.0+ds
---
Dockerfile | 19 +
README.md | 38 +-
ariba/assembly.py | 18 +-
ariba/cluster.py | 24 +-
ariba/clusters.py | 15 +-
ariba/ext/minimap_ariba.cpp | 51 +-
ariba/external_progs.py | 6 +-
ariba/mash.py | 15 +-
ariba/ref_genes_getter.py | 43 +-
ariba/ref_preparer.py | 13 +-
ariba/reference_data.py | 18 +-
ariba/report.py | 188 ++++---
ariba/samtools_variants.py | 5 +-
ariba/summary.py | 20 +-
ariba/summary_cluster.py | 101 ++--
ariba/summary_cluster_variant.py | 67 ++-
ariba/tasks/getref.py | 3 +-
ariba/tasks/prepareref.py | 1 +
ariba/tasks/run.py | 4 +
ariba/test_run_data/metadata.tsv | 1 +
ariba/test_run_data/reads_1.fq | 4 +
ariba/test_run_data/reads_2.fq | 4 +
.../test_run_data/ref_fasta_to_make_reads_from.fa | 5 +
ariba/test_run_data/ref_seqs.fa | 5 +
ariba/tests/assembly_test.py | 14 +-
ariba/tests/cluster_test.py | 133 +++--
ariba/tests/clusters_test.py | 11 +
.../data/cluster_test_full_run_delete_codon.fa | 3 +
.../data/cluster_test_full_run_delete_codon.tsv | 1 +
.../for_reads.fa | 20 +
.../cluster_test_full_run_delete_codon/reads_1.fq | 588 ++++++++++++++++++++
.../cluster_test_full_run_delete_codon/reads_2.fq | 588 ++++++++++++++++++++
.../references.fa | 3 +
.../data/cluster_test_full_run_insert_codon.fa | 3 +
.../data/cluster_test_full_run_insert_codon.tsv | 1 +
.../for_reads.fa | 20 +
.../cluster_test_full_run_insert_codon/reads_1.fq | 592 +++++++++++++++++++++
.../cluster_test_full_run_insert_codon/reads_2.fq | 592 +++++++++++++++++++++
.../references.fa | 3 +
.../data/cluster_test_full_run_multiple_vars.fa | 5 +
.../data/cluster_test_full_run_multiple_vars.tsv | 2 +
.../for_reads.fa | 20 +
.../cluster_test_full_run_multiple_vars/reads_1.fq | 584 ++++++++++++++++++++
.../cluster_test_full_run_multiple_vars/reads_2.fq | 584 ++++++++++++++++++++
.../references.fa | 5 +
.../data/cluster_test_full_run_ok_non_coding.fa | 2 +-
...cluster_test_full_run_ref_not_in_cluster.in.fa} | 2 +-
...cluster_test_full_run_ref_not_in_cluster.in.tsv | 6 +
...uster_test_full_run_ref_not_in_cluster.mash.fa} | 7 +-
...er_test_full_run_ref_not_in_cluster.mash.fa.msh | Bin 0 -> 2760 bytes
.../reads_1.fq | 144 +++++
.../reads_2.fq | 144 +++++
.../references.fa} | 2 +-
...sters_minimap_reads_to_all_refs.out.clstr_count | 4 +-
.../clusters_minimap_reads_to_all_refs.out.hist | 37 +-
ariba/tests/data/clusters_test_dummy_db.fa.msh | Bin 0 -> 504 bytes
ariba/tests/data/clusters_test_write_report.tsv | 2 +-
ariba/tests/data/reference_data_init_ok.rename.tsv | 2 +
.../tests/data/reference_data_load_rename_file.tsv | 2 +
ariba/tests/data/report_filter_test_init_bad.tsv | 8 +-
ariba/tests/data/report_filter_test_init_good.tsv | 10 +-
.../data/report_filter_test_load_report_bad.tsv | 8 +-
.../data/report_filter_test_load_report_good.tsv | 10 +-
.../tests/data/report_filter_test_run.expected.tsv | 12 +-
ariba/tests/data/report_filter_test_run.in.tsv | 18 +-
.../tests/data/report_filter_test_write_report.tsv | 8 +-
.../summary_gather_unfiltered_output_data.in.1.tsv | 12 +-
.../summary_gather_unfiltered_output_data.in.2.tsv | 12 +-
...ample_test_column_names_tuples_and_het_snps.tsv | 16 +-
.../summary_sample_test_column_summary_data.tsv | 16 +-
.../data/summary_sample_test_load_file.in.tsv | 14 +-
.../summary_sample_test_non_synon_variants.tsv | 12 +-
.../tests/data/summary_sample_test_var_groups.tsv | 14 +-
.../tests/data/summary_test_load_input_files.1.tsv | 6 +-
.../tests/data/summary_test_load_input_files.2.tsv | 10 +-
ariba/tests/data/summary_test_whole_run.in.1.tsv | 34 +-
ariba/tests/data/summary_test_whole_run.in.2.tsv | 15 +-
ariba/tests/data/summary_test_whole_run.out.csv | 6 +-
ariba/tests/data/summary_to_matrix.1.tsv | 10 +-
ariba/tests/data/summary_to_matrix.2.tsv | 12 +-
ariba/tests/read_filter_test.py | 8 +-
ariba/tests/ref_preparer_test.py | 1 +
ariba/tests/reference_data_test.py | 17 +
ariba/tests/report_filter_test.py | 58 +-
ariba/tests/samtools_variants_test.py | 22 +-
ariba/tests/summary_cluster_test.py | 248 +++++----
ariba/tests/summary_cluster_variant_test.py | 73 ++-
ariba/tests/summary_test.py | 56 +-
scripts/ariba | 61 ++-
setup.py | 2 +-
90 files changed, 4981 insertions(+), 622 deletions(-)
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..82d13c8
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,19 @@
+#
+# This container will install ARIBA from master
+#
+FROM debian:testing
+
+#
+# Authorship
+#
+MAINTAINER ap13 at sanger.ac.uk
+
+#
+# Install the dependancies
+#
+RUN apt-get update -qq && apt-get install -y git bowtie2 cd-hit fastaq libc6 libfml0 libgcc1 libminimap0 libstdc++6 mash mummer python3 python3-setuptools python3-dev python3-pysam python3-pymummer python3-dendropy gcc g++ zlib1g-dev
+
+#
+# Get the latest code from github and install
+#
+RUN git clone https://github.com/sanger-pathogens/ariba.git && cd ariba && python3 setup.py install
diff --git a/README.md b/README.md
index 9b8dd54..bc52e03 100644
--- a/README.md
+++ b/README.md
@@ -45,6 +45,21 @@ If the tests all pass, install:
python3 setup.py install
+### Docker
+ARIBA can be run in a Docker container. First of all install Docker, then to install ARIBA run:
+
+ docker pull sangerpathogens/ariba
+
+To use ARIBA you would use a command such as this (substituting in your directories), where your files are assumed to be stored in /home/ubuntu/data:
+
+ docker run --rm -it -v /home/ubuntu/data:/data sangerpathogens/ariba ariba -h
+
+
+### Debian (testing)
+ARIBA is available in the latest version of Debian, and over time will progressively filter through to Ubuntu and other distributions which use Debian. To install it as root:
+
+ sudo apt-get install ariba
+
### Dependencies and environment variables
@@ -59,11 +74,10 @@ to the following dependencies.
|----------------|------------------------|---------------------------|
| Bowtie2 | `bowtie2` | `$ARIBA_BOWTIE2` |
| CD-HIT (est) | `cd-hit-est` | `$ARIBA_CDHIT` |
-| CD-HIT (est-2d)| `cd-hit-est-2d` | `$ARIBA_CDHIT2D` |
| MASH | `mash` | `$ARIBA_MASH` |
-For example, you could specify an exact version of a Samtools executable
+For example, you could specify an exact version of a bowtie2 executable
that you compiled and downloaded in your home directory (assuming BASH):
export ARIBA_BOWTIE2=$HOME/bowtie2-2.1.0/bowtie2
@@ -82,16 +96,18 @@ are put in a temporary directory made by ARIBA. The total size of these
files is small, but there can be a many of them. This can be a
problem when running large numbers (100s or 1000s) of jobs simultaneously
on the same file system.
-By default, ARIBA creates a temporary directory for these files
-inside the output directory of each run.
+The parent directory of the temporary directory is determined in the
+following order of precedence:
+
+1. The value of the option `--tmp_dir` (if that option was used)
+2. The environment variable `$ARIBA_TMPDIR` (if it is set)
+3. The environment variable `$TMPDIR` (if it is set)
+4. If none of the above is found, then use the run's output directory.
Each temporary directory
is unique to one run of ARIBA, and is automatically deleted at the end
of the run (even if ARIBA was killed by the user or crashed).
-The parent directory of the temporary
-directory can be changed using the environment variable
-`$ARIBA_TMPDIR`. The temporary directory for each run will be made
-inside `$ARIBA_TMPDIR`. For example,
+For example,
export $ARIBA_TMPDIR=/tmp
@@ -103,12 +119,6 @@ will have a name of the form
where the suffix `abcdef` is a random string of characters, chosen
such that `/tmp/ariba.tmp.abcdef` does not already exist.
-The temporary directory can also be changed using the option
-`--tmp_dir` when running `ariba run`. Using this option takes precedence
-over the environment variable `$ARIBA_TMPDIR`. If neither are
-set, then ARIBA creates the temporary directory inside
-the output directory given to `ariba run`.
-
The exception to the above is if the option `--noclean` is used.
This forces the temporary directory to be placed in the output
directory, and temporary files are kept. It is intended for
diff --git a/ariba/assembly.py b/ariba/assembly.py
index 88fd1c3..87c2273 100644
--- a/ariba/assembly.py
+++ b/ariba/assembly.py
@@ -18,6 +18,7 @@ class Assembly:
final_assembly_fa,
final_assembly_bam,
log_fh,
+ mash_reference_fasta,
scaff_name_prefix='scaffold',
kmer=0,
assembler='fermilite',
@@ -42,6 +43,7 @@ class Assembly:
self.final_assembly_fa = os.path.abspath(final_assembly_fa)
self.final_assembly_bam = os.path.abspath(final_assembly_bam)
self.log_fh = log_fh
+ self.mash_reference_fasta = os.path.abspath(mash_reference_fasta)
self.scaff_name_prefix = scaff_name_prefix
self.ref_seq_name = None
@@ -377,14 +379,26 @@ class Assembly:
self.log_fh = None
return
- masher = mash.Masher(self.ref_fastas, self.gapfilled_length_filtered, self.log_fh, self.extern_progs)
+ masher = mash.Masher(self.mash_reference_fasta, self.gapfilled_length_filtered, self.log_fh, self.extern_progs)
self.ref_seq_name = masher.run(self.mash_dist_file)
if self.ref_seq_name is None:
print('Could not determine closest reference sequence', file=self.log_fh)
self.log_fh = None
return
- faidx.write_fa_subset({self.ref_seq_name}, self.ref_fastas, self.ref_fasta)
+ file_reader = pyfastaq.sequences.file_reader(self.ref_fastas)
+ for ref_seq in file_reader:
+ if self.ref_seq_name == ref_seq.id:
+ f_out = pyfastaq.utils.open_file_write(self.ref_fasta)
+ print(ref_seq, file=f_out)
+ pyfastaq.utils.close(f_out)
+ break
+ else:
+ print('Closest reference sequence ', self.ref_seq_name, ' does not belong to this cluster', file=self.log_fh)
+ self.ref_seq_name = None
+ self.log_fh = None
+ return
+
print('Closest reference sequence according to mash: ', self.ref_seq_name, file=self.log_fh)
contigs_both_strands = self._fix_contig_orientation(self.gapfilled_length_filtered, self.ref_fasta, self.final_assembly_fa, min_id=self.nucmer_min_id, min_length=self.nucmer_min_len, breaklen=self.nucmer_breaklen)
diff --git a/ariba/cluster.py b/ariba/cluster.py
index e5a73b0..efdc2cc 100644
--- a/ariba/cluster.py
+++ b/ariba/cluster.py
@@ -1,4 +1,5 @@
import signal
+import traceback
import os
import atexit
import random
@@ -6,7 +7,7 @@ import math
import shutil
import sys
import pyfastaq
-from ariba import assembly, assembly_compare, assembly_variants, external_progs, flag, mapping, read_filter, report, samtools_variants
+from ariba import assembly, assembly_compare, assembly_variants, external_progs, flag, mapping, mash, report, samtools_variants
class Error (Exception): pass
@@ -17,6 +18,7 @@ class Cluster:
root_dir,
name,
refdata,
+ refdata_seqs_fasta_for_mash=None,
total_reads=None,
total_reads_bases=None,
fail_file=None,
@@ -126,6 +128,13 @@ class Cluster:
else:
self.extern_progs = extern_progs
+ if refdata_seqs_fasta_for_mash is None:
+ mash.Masher.sketch(self.references_fa, True, self.extern_progs, verbose=False)
+ self.refdata_seqs_fasta_for_mash = self.references_fa
+ else:
+ self.refdata_seqs_fasta_for_mash = os.path.abspath(refdata_seqs_fasta_for_mash)
+ assert os.path.exists(self.refdata_seqs_fasta_for_mash + '.msh')
+
self.random_seed = random_seed
wanted_signals = [signal.SIGABRT, signal.SIGINT, signal.SIGSEGV, signal.SIGTERM]
for s in wanted_signals:
@@ -174,9 +183,7 @@ class Cluster:
self.refdata.write_seqs_to_fasta(self.references_fa, self.reference_names)
self.log_fh = pyfastaq.utils.open_file_write(self.logfile)
- self.read_store.get_reads(self.name, self.all_reads1, self.all_reads2)
- rfilter = read_filter.ReadFilter(self.read_store, self.references_fa, self.name, self.log_fh, self.extern_progs)
- self.total_reads, self.total_reads_bases = rfilter.run(self.all_reads1, self.all_reads2)
+ self.total_reads, self.total_reads_bases = self.read_store.get_reads(self.name, self.all_reads1, self.all_reads2, log_fh=self.log_fh)
self.refdata.write_seqs_to_fasta(self.references_fa, self.reference_names)
self.longest_ref_length = max([len(self.refdata.sequence(name)) for name in self.reference_names])
@@ -314,6 +321,7 @@ class Cluster:
self.final_assembly_fa,
self.final_assembly_bam,
self.log_fh,
+ self.refdata_seqs_fasta_for_mash,
scaff_name_prefix=self.name,
kmer=self.assembly_kmer,
assembler=self.assembler,
@@ -420,8 +428,12 @@ class Cluster:
print('\nCould not get closest reference sequence\n', file=self.log_fh, flush=True)
self.status_flag.add('ref_seq_choose_fail')
+ try:
+ self.report_lines = report.report_lines(self)
+ except:
+ print('Error making report for cluster ', self.name, '... traceback:', file=sys.stderr)
+ traceback.print_exc(file=sys.stderr)
+ raise Error('Error making report for cluster ' + self.name)
- print('\nMaking report lines', file=self.log_fh, flush=True)
- self.report_lines = report.report_lines(self)
self._clean()
atexit.unregister(self._atexit)
diff --git a/ariba/clusters.py b/ariba/clusters.py
index 1052f6f..f2eac5a 100644
--- a/ariba/clusters.py
+++ b/ariba/clusters.py
@@ -76,6 +76,9 @@ class Clusters:
self.extern_progs = extern_progs
self.clusters_tsv = os.path.abspath(os.path.join(refdata_dir, '02.cdhit.clusters.tsv'))
self.all_ref_seqs_fasta = os.path.abspath(os.path.join(refdata_dir, '02.cdhit.all.fa'))
+ mash_file = self.all_ref_seqs_fasta + '.msh'
+ if not os.path.exists(mash_file):
+ raise Error('Error! Mash file ' + mash_file + ' not found.\nThe likely cause is that prepareref was run using an old version of ariba.\nIf this is the case, please rerun ariba preparef.')
if version_report_lines is None:
self.version_report_lines = []
@@ -95,8 +98,7 @@ class Clusters:
self.cdhit_cluster_representatives_fa = self.cdhit_files_prefix + '.cluster_representatives.fa'
self.bam_prefix = os.path.join(self.outdir, 'map_reads_to_cluster_reps')
self.bam = self.bam_prefix + '.bam'
- self.report_file_all_tsv = os.path.join(self.outdir, 'report.all.tsv')
- self.report_file_all_xls = os.path.join(self.outdir, 'report.all.xls')
+ self.report_file_all_tsv = os.path.join(self.outdir, 'debug.report.tsv')
self.report_file_filtered = os.path.join(self.outdir, 'report.tsv')
self.catted_assembled_seqs_fasta = os.path.join(self.outdir, 'assembled_seqs.fa.gz')
self.catted_genes_matching_refs_fasta = os.path.join(self.outdir, 'assembled_genes.fa.gz')
@@ -138,6 +140,8 @@ class Clusters:
if tmp_dir is None:
if 'ARIBA_TMPDIR' in os.environ:
tmp_dir = os.path.abspath(os.environ['ARIBA_TMPDIR'])
+ elif 'TMPDIR' in os.environ:
+ tmp_dir = os.path.abspath(os.environ['TMPDIR'])
else:
tmp_dir = self.outdir
@@ -372,6 +376,12 @@ class Clusters:
for cluster_name in sorted(self.cluster_to_dir):
counter += 1
+
+ if self.cluster_read_counts[cluster_name] <= 2:
+ if self.verbose:
+ print('Not constructing cluster ', cluster_name, ' because it only has ', self.cluster_read_counts[cluster_name], ' reads (', counter, ' of ', len(self.cluster_to_dir), ')', sep='')
+ continue
+
if self.verbose:
print('Constructing cluster ', cluster_name, ' (', counter, ' of ', len(self.cluster_to_dir), ')', sep='')
new_dir = self.cluster_to_dir[cluster_name]
@@ -381,6 +391,7 @@ class Clusters:
new_dir,
cluster_name,
self.refdata,
+ refdata_seqs_fasta_for_mash=self.all_ref_seqs_fasta,
fail_file=os.path.join(self.fails_dir, cluster_name),
read_store=self.read_store,
reference_names=self.cluster_ids[cluster_name],
diff --git a/ariba/ext/minimap_ariba.cpp b/ariba/ext/minimap_ariba.cpp
index 612945e..91fd82b 100644
--- a/ariba/ext/minimap_ariba.cpp
+++ b/ariba/ext/minimap_ariba.cpp
@@ -23,6 +23,7 @@ void chooseCluster(std::string outfile, std::map<std::string, uint64_t>& refname
void writeClusterCountsFile(std::string outfile, const std::map<std::string, uint64_t>& readCounters, const std::map<std::string, uint64_t>& baseCounters);
void writeInsertHistogramFile(std::string outfile, const std::map<uint32_t, uint32_t>& insertHist);
void writeProperPairsFile(std::string outfile, uint32_t properPairs);
+bool readMappingOk(const mm_reg1_t* r, const mm_idx_t* mi, const kseq_t *ks1, uint32_t endTolerance);
int run_minimap(char *clustersFileIn, char *refFileIn, char *readsFile1In, char *readsFile2In, char *outprefixIn);
@@ -143,18 +144,24 @@ int run_minimap(char *clustersFileIn, char *refFileIn, char *readsFile1In, char
for (j =0; j < n_reg1; ++j)
{
const mm_reg1_t *r = ®1[j];
- refnames.insert(mi->name[r->rid]);
- refnameToScore[mi->name[r->rid]] += r->cnt;
- uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
- positions1[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+ if (readMappingOk(r, mi, ks1, (int) 1.1 * k))
+ {
+ refnames.insert(mi->name[r->rid]);
+ refnameToScore[mi->name[r->rid]] += r->cnt;
+ uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
+ positions1[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+ }
}
for (j =0; j < n_reg2; ++j)
{
const mm_reg1_t *r = ®2[j];
- refnames.insert(mi->name[r->rid]);
- refnameToScore[mi->name[r->rid]] += r->cnt;
- uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
- positions2[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+ if (readMappingOk(r, mi, ks2, (int) 1.1 * k))
+ {
+ refnames.insert(mi->name[r->rid]);
+ refnameToScore[mi->name[r->rid]] += r->cnt;
+ uint32_t coord = r->rev ? std::max(r->rs, r->re) : std::min(r->rs, r->re);
+ positions2[mi->name[r->rid]].push_back(std::make_pair(coord, r->rev));
+ }
}
bool foundProperPair = false;
@@ -349,3 +356,31 @@ void writeProperPairsFile(std::string outfile, uint32_t properPairs)
ofs << properPairs << '\n';
ofs.close();
}
+
+
+bool readMappingOk(const mm_reg1_t* r, const mm_idx_t* mi, const kseq_t *ks, uint32_t endTolerance)
+{
+ // coords are same style as python (0-based, end is one past the end)
+ assert (r->qs < r->qe && r->rs < r->re);
+
+ if (r->qe - r->qs < std::min((unsigned) 50, (int) 0.5 * ks->seq.l))
+ {
+ return false;
+ }
+
+ uint32_t refLength = mi->len[r->rid];
+ bool startOk;
+ bool endOk;
+ if (r->rev)
+ {
+ startOk = (r->qs < endTolerance || refLength - r->re < endTolerance);
+ endOk = (ks->seq.l - r->qe < endTolerance || r->rs < endTolerance);
+ }
+ else
+ {
+ startOk = (r->qs < endTolerance || r->rs < endTolerance);
+ endOk = (ks->seq.l - r->qe < endTolerance || refLength - r->re < endTolerance);
+ }
+
+ return (startOk && endOk);
+}
diff --git a/ariba/external_progs.py b/ariba/external_progs.py
index 1fe3bc6..622c0e6 100644
--- a/ariba/external_progs.py
+++ b/ariba/external_progs.py
@@ -12,7 +12,7 @@ class Error (Exception): pass
prog_to_default = {
'bowtie2': 'bowtie2',
'cdhit': 'cd-hit-est',
- 'cdhit2d': 'cd-hit-est-2d',
+ #'cdhit2d': 'cd-hit-est-2d',
#'gapfiller': 'GapFiller.pl',
'mash': 'mash',
'nucmer' : 'nucmer',
@@ -27,7 +27,7 @@ prog_to_env_var = {x: 'ARIBA_' + x.upper() for x in prog_to_default if x not in
prog_to_version_cmd = {
'bowtie2': ('--version', re.compile('.*bowtie2.*version (.*)$')),
'cdhit': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
- 'cdhit2d': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
+ #'cdhit2d': ('', re.compile('CD-HIT version ([0-9\.]+) \(')),
#'gapfiller': ('', re.compile('^Usage: .*pl \[GapFiller_(.*)\]')),
'mash': ('', re.compile('^Mash version (.*)$')),
'nucmer': ('--version', re.compile('^NUCmer \(NUCleotide MUMmer\) version ([0-9\.]+)')),
@@ -39,7 +39,7 @@ prog_to_version_cmd = {
min_versions = {
'bowtie2': '2.1.0',
'cdhit': '4.6',
- 'cdhit2d': '4.6',
+ #'cdhit2d': '4.6',
'mash': '1.0.2',
'nucmer': '3.1',
#'spades': '3.5.0',
diff --git a/ariba/mash.py b/ariba/mash.py
index 97a9496..b4c3ffb 100644
--- a/ariba/mash.py
+++ b/ariba/mash.py
@@ -19,9 +19,13 @@ class Masher:
self.extern_progs = extern_progs
- def _sketch(self, infile, individual):
+ @classmethod
+ def sketch(cls, infile, individual, extern_progs, verbose=True, verbose_filehandle=None):
+ if verbose:
+ assert verbose_filehandle is not None
+
cmd_list = [
- self.extern_progs.exe('mash'),
+ extern_progs.exe('mash'),
'sketch',
'-s 100000'
]
@@ -30,7 +34,7 @@ class Masher:
cmd_list.append('-i')
cmd_list.append(infile)
- common.syscall(' '.join(cmd_list), verbose=True, verbose_filehandle=self.log_fh)
+ common.syscall(' '.join(cmd_list), verbose=verbose, verbose_filehandle=verbose_filehandle)
def _dist(self, outfile):
@@ -45,8 +49,9 @@ class Masher:
def run(self, outfile):
- self._sketch(self.reference_fa, True)
- self._sketch(self.query_fa, False)
+ if not os.path.exists(self.reference_fa + '.msh'):
+ Masher.sketch(self.reference_fa, True, self.extern_progs, verbose=True, verbose_filehandle=self.log_fh)
+ Masher.sketch(self.query_fa, False, self.extern_progs, verbose=True, verbose_filehandle=self.log_fh)
self._dist(outfile)
if os.path.getsize(outfile) == 0:
return None
diff --git a/ariba/ref_genes_getter.py b/ariba/ref_genes_getter.py
index e59efbb..34b4d8c 100644
--- a/ariba/ref_genes_getter.py
+++ b/ariba/ref_genes_getter.py
@@ -25,10 +25,11 @@ argannot_ref = '"ARG-ANNOT, a new bioinformatic tool to discover antibiotic resi
class RefGenesGetter:
- def __init__(self, ref_db, version=None):
+ def __init__(self, ref_db, version=None, debug=False):
if ref_db not in allowed_ref_dbs:
raise Error('Error in RefGenesGetter. ref_db must be one of: ' + str(allowed_ref_dbs) + ', but I got "' + ref_db)
self.ref_db=ref_db
+ self.debug = debug
self.genetic_code = 11
self.max_download_attempts = 3
self.sleep_time = 2
@@ -185,6 +186,9 @@ class RefGenesGetter:
pyfastaq.utils.close(f_out_tsv)
pyfastaq.utils.close(f_out_log)
os.chdir(current_dir)
+ if not self.debug:
+ shutil.rmtree(tmpdir)
+
print('Extracted data and written ARIBA input files\n')
print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
@@ -216,22 +220,36 @@ class RefGenesGetter:
print('Combining downloaded fasta files...')
fout_fa = pyfastaq.utils.open_file_write(final_fasta)
fout_tsv = pyfastaq.utils.open_file_write(final_tsv)
+ used_names = {}
- for filename in os.listdir('database'):
+ for filename in os.listdir():
if filename.endswith('.fsa'):
print(' ', filename)
- prefix = filename.split('.')[0]
- file_reader = pyfastaq.sequences.file_reader(os.path.join('database', filename))
+ file_reader = pyfastaq.sequences.file_reader(filename)
for seq in file_reader:
- seq.id = prefix + '.' + seq.id
+ try:
+ prefix, suffix = seq.id.split('_', maxsplit=1)
+ description = 'Original name: ' + seq.id
+ seq.id = prefix + '.' + suffix
+ except:
+ description = '.'
+
+ # names are not unique across the files
+ if seq.id in used_names:
+ used_names[seq.id] += 1
+ seq.id += '_' + str(used_names[seq.id])
+ else:
+ used_names[seq.id] = 1
+
print(seq, file=fout_fa)
- print(seq.id, '1', '0', '.', '.', '.', sep='\t', file=fout_tsv)
+ print(seq.id, '1', '0', '.', '.', description, sep='\t', file=fout_tsv)
pyfastaq.utils.close(fout_fa)
pyfastaq.utils.close(fout_tsv)
print('\nFinished combining files\n')
os.chdir(current_dir)
- shutil.rmtree(tmpdir)
+ if not self.debug:
+ shutil.rmtree(tmpdir)
print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
@@ -273,7 +291,8 @@ class RefGenesGetter:
pyfastaq.utils.close(f_out_tsv)
pyfastaq.utils.close(f_out_fa)
- shutil.rmtree(tmpdir)
+ if not self.debug:
+ shutil.rmtree(tmpdir)
print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
@@ -326,7 +345,8 @@ class RefGenesGetter:
pyfastaq.utils.close(fout_tsv)
print('\nFinished combining files\n')
os.chdir(current_dir)
- shutil.rmtree(tmpdir)
+ if not self.debug:
+ shutil.rmtree(tmpdir)
print('Finished. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
print('ariba prepareref -f', final_fasta, '-m', final_tsv, 'output_directory\n')
@@ -358,6 +378,8 @@ class RefGenesGetter:
pyfastaq.utils.close(f_out_fa)
pyfastaq.utils.close(f_out_meta)
+ if not self.debug:
+ os.unlink(srst2_fa)
print('Finished downloading and converting data. Final files are:', final_fasta, final_tsv, sep='\n\t', end='\n\n')
print('You can use them with ARIBA like this:')
@@ -390,7 +412,8 @@ class RefGenesGetter:
print('Extracting files ... ', end='', flush=True)
vparser = vfdb_parser.VfdbParser(zipfile, outprefix)
vparser.run()
- shutil.rmtree(tmpdir)
+ if not self.debug:
+ shutil.rmtree(tmpdir)
print('done')
final_fasta = outprefix + '.fa'
final_tsv = outprefix + '.tsv'
diff --git a/ariba/ref_preparer.py b/ariba/ref_preparer.py
index c2cb310..ecacd65 100644
--- a/ariba/ref_preparer.py
+++ b/ariba/ref_preparer.py
@@ -1,8 +1,9 @@
import sys
import os
+import shutil
import pickle
import pyfastaq
-from ariba import reference_data
+from ariba import reference_data, mash
class Error (Exception): pass
@@ -23,6 +24,7 @@ class RefPreparer:
clusters_file=None,
threads=1,
verbose=False,
+ force=False,
):
self.extern_progs = extern_progs
@@ -43,6 +45,7 @@ class RefPreparer:
self.clusters_file = clusters_file
self.threads = threads
self.verbose = verbose
+ self.force = force
@classmethod
@@ -136,6 +139,9 @@ class RefPreparer:
def run(self, outdir):
original_dir = os.getcwd()
+ if self.force and os.path.exists(outdir):
+ shutil.rmtree(outdir)
+
if os.path.exists(outdir):
raise Error('Error! Output directory ' + outdir + ' already exists. Cannot continue')
@@ -204,3 +210,8 @@ class RefPreparer:
with open(clusters_pickle_file, 'wb') as f:
pickle.dump(clusters, f)
+ if self.verbose:
+ print('\nMash-sketching all reference sequences', flush=True)
+
+ mash.Masher.sketch(os.path.join(outdir, '02.cdhit.all.fa'), True, self.extern_progs, self.verbose, sys.stdout)
+
diff --git a/ariba/reference_data.py b/ariba/reference_data.py
index 2ddbac0..69c5755 100644
--- a/ariba/reference_data.py
+++ b/ariba/reference_data.py
@@ -15,6 +15,7 @@ class ReferenceData:
def __init__(self,
fasta_files,
metadata_tsv_files,
+ rename_file=None,
min_gene_length=6,
max_gene_length=10000,
genetic_code=11,
@@ -32,6 +33,22 @@ class ReferenceData:
pyfastaq.sequences.genetic_code = self.genetic_code
self.rename_dict = None
+ if rename_file is None or not os.path.exists(rename_file):
+ self.ariba_to_original_name = {}
+ else:
+ self.ariba_to_original_name = ReferenceData._load_rename_file(rename_file)
+
+
+ @classmethod
+ def _load_rename_file(cls, filename):
+ ariba_name_to_original_name = {}
+ f = pyfastaq.utils.open_file_read(filename)
+ for line in f:
+ original_name, ariba_name = line.rstrip().split('\t')
+ ariba_name_to_original_name[ariba_name] = original_name
+ pyfastaq.utils.close(f)
+ return ariba_name_to_original_name
+
@classmethod
def _load_metadata_tsv(cls, filename, metadata_dict):
@@ -356,7 +373,6 @@ class ReferenceData:
def rename_sequences(self, outfile):
self.rename_dict = ReferenceData._seq_names_to_rename_dict(self.sequences.keys())
if len(self.rename_dict):
- print('Had to rename some sequences. See', outfile, 'for old -> new names', file=sys.stderr)
with open(outfile, 'w') as f:
for old_name, new_name in sorted(self.rename_dict.items()):
print(old_name, new_name, sep='\t', file=f)
diff --git a/ariba/report.py b/ariba/report.py
index 324124a..bbe215c 100644
--- a/ariba/report.py
+++ b/ariba/report.py
@@ -1,40 +1,43 @@
import copy
+import re
import sys
import pymummer
+from ariba import sequence_variant
class Error (Exception): pass
columns = [
- 'ref_name', # 0 name of reference sequence
- 'gene', # 1 is a gene 0|1
- 'var_only', # 2 is variant only 0|1
- 'flag', # 3 cluster flag
- 'reads', # 4 number of reads in this cluster
- 'cluster', # 5 name of cluster
- 'ref_len', # 6 length of reference sequence
- 'ref_base_assembled', # 7 number of reference nucleotides assembled by this contig
- 'pc_ident', # 8 %identity between ref sequence and contig
- 'ctg', # 9 name of contig matching reference
- 'ctg_len', # 10 length of contig matching reference
- 'ctg_cov', # 11 mean mapped read depth of this contig
- 'known_var', # 12 is this a known SNP from reference metadata? 1|0
- 'var_type', # 13 The type of variant. Currently only SNP supported
- 'var_seq_type', # 14 if known_var=1, n|p for nucleotide or protein
- 'known_var_change', # 15 if known_var=1, the wild/variant change, eg I42L
- 'has_known_var', # 16 if known_var=1, 1|0 for whether or not the assembly has the variant
- 'ref_ctg_change', # 17 amino acid or nucleotide change between reference and contig, eg I42L
- 'ref_ctg_effect', # 18 effect of change between reference and contig, eg SYS, NONSYN (amino acid changes only)
- 'ref_start', # 19 start position of variant in contig
- 'ref_end', # 20 end position of variant in contig
- 'ref_nt', # 21 nucleotide(s) in contig at variant position
- 'ctg_start', # 22 start position of variant in contig
- 'ctg_end', # 23 end position of variant in contig
- 'ctg_nt', # 24 nucleotide(s) in contig at variant position
- 'smtls_total_depth', # 25 total read depth at variant start position in contig, reported by mpileup
- 'smtls_alt_nt', # 26 alt nucleotides on contig, reported by mpileup
- 'smtls_alt_depth', # 27 alt depth on contig, reported by mpileup
- 'var_description', # 28 description of variant from reference metdata
- 'free_text', # 29 other free text about reference sequence, from reference metadata
+ 'ariba_ref_name', # 0 ariba (renamed) name of reference sequence
+ 'ref_name', # 1 original name of ref sequence
+ 'gene', # 2 is a gene 0|1
+ 'var_only', # 3 is variant only 0|1
+ 'flag', # 4 cluster flag
+ 'reads', # 5 number of reads in this cluster
+ 'cluster', # 6 name of cluster
+ 'ref_len', # 7 length of reference sequence
+ 'ref_base_assembled', # 8 number of reference nucleotides assembled by this contig
+ 'pc_ident', # 9 %identity between ref sequence and contig
+ 'ctg', # 10 name of contig matching reference
+ 'ctg_len', # 11 length of contig matching reference
+ 'ctg_cov', # 12 mean mapped read depth of this contig
+ 'known_var', # 13 is this a known SNP from reference metadata? 1|0
+ 'var_type', # 14 The type of variant. Currently only SNP supported
+ 'var_seq_type', # 15 if known_var=1, n|p for nucleotide or protein
+ 'known_var_change', # 16 if known_var=1, the wild/variant change, eg I42L
+ 'has_known_var', # 17 if known_var=1, 1|0 for whether or not the assembly has the variant
+ 'ref_ctg_change', # 18 amino acid or nucleotide change between reference and contig, eg I42L
+ 'ref_ctg_effect', # 19 effect of change between reference and contig, eg SYS, NONSYN (amino acid changes only)
+ 'ref_start', # 20 start position of variant in contig
+ 'ref_end', # 21 end position of variant in contig
+ 'ref_nt', # 22 nucleotide(s) in contig at variant position
+ 'ctg_start', # 23 start position of variant in contig
+ 'ctg_end', # 24 end position of variant in contig
+ 'ctg_nt', # 25 nucleotide(s) in contig at variant position
+ 'smtls_total_depth', # 26 total read depth at variant start position in contig, reported by mpileup
+ 'smtls_nts', # 27 alt nucleotides on contig, reported by mpileup
+ 'smtls_nts_depth', # 28 alt depth on contig, reported by mpileup
+ 'var_description', # 29 description of variant from reference metdata
+ 'free_text', # 30 other free text about reference sequence, from reference metadata
]
@@ -53,8 +56,8 @@ var_columns = [
'ctg_end',
'ctg_nt',
'smtls_total_depth',
- 'smtls_alt_nt',
- 'smtls_alt_depth',
+ 'smtls_nts',
+ 'smtls_nts_depth',
'var_description',
]
@@ -91,11 +94,12 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
if ref_nuc_range is None:
return None
+ bases = []
ctg_nts = []
ref_nts = []
smtls_total_depths = []
- smtls_alt_nts = []
- smtls_alt_depths = []
+ smtls_nts = []
+ smtls_depths = []
contig_positions = []
for ref_position in range(ref_nuc_range[0], ref_nuc_range[1]+1, 1):
@@ -106,17 +110,19 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
ref_nts.append(cluster.ref_sequence[ref_position])
contig_position, in_indel = nucmer_match.qry_coords_from_ref_coord(ref_position, variant_list)
contig_positions.append(contig_position)
- ref, alt, total_depth, alt_depths = cluster.samtools_vars.get_depths_at_position(contig_name, contig_position)
- ctg_nts.append(ref)
- smtls_alt_nts.append(alt)
+ bases, total_depth, base_depths = cluster.samtools_vars.get_depths_at_position(contig_name, contig_position)
+ #ctg_nts.append(ref)
+ #samtools_nts.append(bases)
+ ctg_nts.append(cluster.assembly.sequences[contig_name][contig_position])
+ smtls_nts.append(bases)
smtls_total_depths.append(total_depth)
- smtls_alt_depths.append(alt_depths)
+ smtls_depths.append(base_depths)
ctg_nts = ';'.join(ctg_nts) if len(ctg_nts) else '.'
ref_nts = ';'.join(ref_nts) if len(ref_nts) else '.'
- smtls_alt_nts = ';'.join(smtls_alt_nts) if len(smtls_alt_nts) else '.'
+ smtls_nts = ';'.join(smtls_nts) if len(smtls_nts) else '.'
smtls_total_depths = ';'.join([str(x)for x in smtls_total_depths]) if len(smtls_total_depths) else '.'
- smtls_alt_depths = ';'.join([str(x)for x in smtls_alt_depths]) if len(smtls_alt_depths) else '.'
+ smtls_depths = ';'.join([str(x)for x in smtls_depths]) if len(smtls_depths) else '.'
ctg_start = str(min(contig_positions) + 1) if contig_positions is not None else '.'
ctg_end = str(max(contig_positions) + 1) if contig_positions is not None else '.'
@@ -128,8 +134,8 @@ def _samtools_depths_at_known_snps_all_wild(sequence_meta, contig_name, cluster,
ctg_end,
ctg_nts,
smtls_total_depths,
- smtls_alt_nts,
- smtls_alt_depths
+ smtls_nts,
+ smtls_depths
]]
@@ -149,6 +155,7 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
common_first_columns = [
cluster.ref_sequence.id,
+ cluster.refdata.ariba_to_original_name.get(cluster.ref_sequence.id, cluster.ref_sequence.id),
cluster.is_gene,
cluster.is_variant_only,
str(cluster.status_flag),
@@ -193,33 +200,86 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
if contributing_vars is None:
samtools_columns = [['.'] * 9]
else:
- contributing_vars.sort(key = lambda x: x.qry_start)
+ if var_effect in ['INDELS', 'MULTIPLE']:
+ ref_start_pos = min([x.ref_start for x in contributing_vars])
+ ref_end_pos = max([x.ref_start for x in contributing_vars])
+ ctg_start_pos = min([x.qry_start for x in contributing_vars])
+ ctg_end_pos = max([x.qry_start for x in contributing_vars])
+ else:
+ ref_start_pos = 3 * position if cluster.is_gene == '1' else position
+ assert contig_name in cluster.assembly_compare.nucmer_hits
+ ref_start_hit = None
+ for hit in cluster.assembly_compare.nucmer_hits[contig_name]:
+ if hit.ref_name == cluster.ref_sequence.id and hit.ref_coords().distance_to_point(ref_start_pos) == 0:
+ ref_start_hit = copy.copy(hit)
+ break
+
+ assert ref_start_hit is not None
+ ctg_start_pos, ctg_start_in_indel = ref_start_hit.qry_coords_from_ref_coord(ref_start_pos, pymummer_variants)
+
+ if known_var_change not in ['.', 'unknown']:
+ regex = re.match('^([^0-9]+)([0-9]+)([^0-9]+)$', known_var_change)
+ try:
+ ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+ except:
+ raise Error('Error parsing variant ' + known_var_change)
+ elif ref_ctg_change != '.':
+ if '_' in ref_ctg_change:
+ regex = re.match('^([^0-9]+)([0-9]+)_[^0-9]+[0-9]+([^0-9]+)$', ref_ctg_change)
+ try:
+ ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+ except:
+ raise Error('Error parsing variant ' + ref_ctg_change)
+ else:
+ regex = re.match('^([^0-9]+)([0-9]+)([^0-9]+)$', ref_ctg_change)
+ try:
+ ref_var_string, ref_var_position, ctg_var_string = regex.group(1, 2, 3)
+ except:
+ raise Error('Error parsing variant ' + ref_ctg_change)
+ else:
+ assert var_effect == 'SYN'
+
+ if var_effect == 'SYN':
+ ref_end_pos = ref_start_pos + 2
+ ctg_end_pos = ctg_start_pos + 2
+ elif ref_var_string == '.' or var_effect in {'INS', 'DEL', 'FSHIFT', 'TRUNC', 'INDELS', 'UNKNOWN'}:
+ ref_end_pos = ref_start_pos
+ ctg_end_pos = ctg_start_pos
+ elif cluster.is_gene == '1':
+ ref_end_pos = ref_start_pos + 3 * len(ref_var_string) - 1
+ ctg_end_pos = ctg_start_pos + 3 * len(ctg_var_string) - 1
+ else:
+ ref_end_pos = ref_start_pos + len(ref_var_string) - 1
+ ctg_end_pos = ctg_start_pos + len(ctg_var_string) - 1
smtls_total_depth = []
smtls_alt_nt = []
smtls_alt_depth = []
- for var in contributing_vars:
+ for qry_pos in range(ctg_start_pos, ctg_end_pos + 1, 1):
if contig_name in remaining_samtools_variants:
- remaining_samtools_variants[contig_name].discard(var.qry_start)
+ try:
+ remaining_samtools_variants[contig_name].discard(qry_pos)
+ except:
+ pass
- depths_tuple = cluster.samtools_vars.get_depths_at_position(contig_name, var.qry_start)
+ depths_tuple = cluster.samtools_vars.get_depths_at_position(contig_name, qry_pos)
if depths_tuple is not None:
- smtls_alt_nt.append(depths_tuple[1])
- smtls_total_depth.append(str(depths_tuple[2]))
- smtls_alt_depth.append(str(depths_tuple[3]))
+ smtls_alt_nt.append(depths_tuple[0])
+ smtls_total_depth.append(str(depths_tuple[1]))
+ smtls_alt_depth.append(str(depths_tuple[2]))
smtls_total_depth = ';'.join(smtls_total_depth) if len(smtls_total_depth) else '.'
smtls_alt_nt = ';'.join(smtls_alt_nt) if len(smtls_alt_nt) else '.'
smtls_alt_depth = ';'.join(smtls_alt_depth) if len(smtls_alt_depth) else '.'
samtools_columns = [
- str(contributing_vars[0].ref_start + 1), #ref_start
- str(contributing_vars[0].ref_end + 1), # ref_end
- ';'.join([x.ref_base for x in contributing_vars]), # ref_nt
- str(contributing_vars[0].qry_start + 1), # ctg_start
- str(contributing_vars[0].qry_end + 1), #ctg_end
- ';'.join([x.qry_base for x in contributing_vars]), #ctg_nt
+ str(ref_start_pos + 1), #ref_start
+ str(ref_end_pos + 1), # ref_end
+ cluster.ref_sequence[ref_start_pos:ref_end_pos+1],
+ str(ctg_start_pos + 1), # ctg_start
+ str(ctg_end_pos + 1), #ctg_end
+ cluster.assembly.sequences[contig_name][ctg_start_pos:ctg_end_pos + 1], # ctg_nt
smtls_total_depth,
smtls_alt_nt,
smtls_alt_depth,
@@ -230,6 +290,8 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
for matching_var in matching_vars_set:
if contributing_vars is None:
samtools_columns = _samtools_depths_at_known_snps_all_wild(matching_var, contig_name, cluster, pymummer_variants)
+ samtools_columns[2] = samtools_columns[2].replace(';', '')
+ samtools_columns[5] = samtools_columns[5].replace(';', '')
reported_known_vars.add(str(matching_var.variant))
variant_columns[3] = str(matching_var.variant)
@@ -271,7 +333,9 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
var_string = None
else:
ref_nt = cluster.ref_sequence[ref_coord]
- var_string = depths_tuple[0] + str(ref_coord + 1) + depths_tuple[1]
+ ctg_nt = cluster.assembly.sequences[contig_name][var_position]
+ alt_strings = [x for x in depths_tuple[0].split(',') if x != ctg_nt]
+ var_string = ctg_nt + str(ref_coord + 1) + ','.join(alt_strings)
ref_coord = str(ref_coord + 1)
if var_string not in reported_known_vars:
@@ -280,10 +344,10 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
'HET', # var_type
'.', '.', '.', var_string, '.', ref_coord, ref_coord, ref_nt, # var_seq_type ... ref_nt
str(var_position + 1), str(var_position + 1), # ctg_start, ctg_end
- depths_tuple[0], # ctg_nt
- str(depths_tuple[2]), # smtls_total_depth
- depths_tuple[1], # smtls_alt_nt
- str(depths_tuple[3]), # smtls_alt_depth
+ ctg_nt, # ctg_nt
+ str(depths_tuple[1]), # smtls_total_depth
+ depths_tuple[0], # smtls_alt_nt
+ str(depths_tuple[2]), # smtls_alt_depth
'.',
free_text_column,
]
@@ -297,11 +361,11 @@ def _report_lines_for_one_contig(cluster, contig_name, ref_cov_per_contig, pymum
def report_lines(cluster):
if cluster.status_flag.has('ref_seq_choose_fail'):
- fields = ['.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 6)
+ fields = ['.', '.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 7)
assert len(fields) == len(columns)
return ['\t'.join(fields)]
elif cluster.status_flag.has('assembly_fail'):
- fields = ['.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 6)
+ fields = ['.', '.', '.', '.', str(cluster.status_flag), str(cluster.total_reads), cluster.name] + ['.'] * (len(columns) - 7)
assert len(fields) == len(columns)
return ['\t'.join(fields)]
diff --git a/ariba/samtools_variants.py b/ariba/samtools_variants.py
index a7c58be..84c97fb 100644
--- a/ariba/samtools_variants.py
+++ b/ariba/samtools_variants.py
@@ -76,7 +76,8 @@ class SamtoolsVariants:
if len(rows) == 1:
r, p, ref_base, alt_base, ref_counts, alt_counts = rows[0].rstrip().split()
- return ref_base, alt_base, int(ref_counts), alt_counts
+ bases = ref_base if alt_base == '.' else ref_base + ',' + alt_base
+ return bases, int(ref_counts), alt_counts
else:
return None
@@ -161,7 +162,7 @@ class SamtoolsVariants:
if seq_name in d and position in d[seq_name]:
return d[seq_name][position]
else:
- return 'ND', 'ND', 'ND', 'ND'
+ return 'ND', 'ND', 'ND'
def run(self):
diff --git a/ariba/summary.py b/ariba/summary.py
index 3d2b349..fc0a2b0 100644
--- a/ariba/summary.py
+++ b/ariba/summary.py
@@ -126,13 +126,15 @@ class Summary:
if variant.var_group not in seen_groups:
seen_groups[variant.var_group] = {'yes': 0, 'het': 0}
- if variant.het_percent is None:
- seen_groups[variant.var_group]['yes'] += 1
- this_cluster_dict['groups'][variant.var_group] = 'yes'
- else:
+ if variant.het_percent is not None:
+ this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
+
+ if variant.is_het:
seen_groups[variant.var_group]['het'] += 1
this_cluster_dict['groups'][variant.var_group] = 'het'
- this_cluster_dict['groups'][variant.var_group + '.%'] = variant.het_percent
+ else:
+ seen_groups[variant.var_group]['yes'] += 1
+ this_cluster_dict['groups'][variant.var_group] = 'yes'
for group, d in seen_groups.items():
if d['het'] > 0 and d['het'] + d['yes'] > 1:
@@ -254,6 +256,7 @@ class Summary:
'het': '#fdbf6f',
'fragmented': '#1f78b4',
'interrupted': '#a6cee3',
+ 'partial': '#fdbf6f',
}
cols_to_add_colour_col.reverse()
@@ -272,7 +275,8 @@ class Summary:
@classmethod
def _matrix_to_csv(cls, matrix, header, outfile, remove_nas=False):
f = pyfastaq.utils.open_file_write(outfile)
- print(*header, sep=',', file=f)
+ fixed_header = [x.replace(',', '/') for x in header]
+ print(*fixed_header, sep=',', file=f)
for line in matrix:
if remove_nas:
new_line = ['' if x=='NA' else x for x in line]
@@ -284,6 +288,10 @@ class Summary:
@staticmethod
def _distance_score_between_values(value1, value2):
+ if value1 == 'partial':
+ value1 = 'no'
+ if value2 == 'partial':
+ value2 = 'no'
value_set = {value1, value2}
if value_set.isdisjoint(required_keys_for_difference) or value1 == value2 or value_set == {'NA', 'no'}:
return 0
diff --git a/ariba/summary_cluster.py b/ariba/summary_cluster.py
index efc4cf8..f3f952c 100644
--- a/ariba/summary_cluster.py
+++ b/ariba/summary_cluster.py
@@ -96,6 +96,14 @@ class SummaryCluster:
return identity
+ def _has_any_part_of_ref_assembled(self):
+ for d in self.data:
+ if isinstance(d['ref_base_assembled'], int) and d['ref_base_assembled'] > 0:
+ return True
+
+ return False
+
+
def _to_cluster_summary_assembled(self):
if len(self.data) == 0:
return 'no'
@@ -105,10 +113,10 @@ class SummaryCluster:
else:
has_complete_gene = self.flag.has('complete_gene')
- if self.flag.has('assembly_fail') or \
- (not self.flag.has('assembled')) or \
- self.flag.has('ref_seq_choose_fail'):
+ if self.flag.has('assembly_fail') or self.flag.has('ref_seq_choose_fail'):
return 'no'
+ elif not self.flag.has('assembled'):
+ return 'partial' if self._has_any_part_of_ref_assembled() else 'no'
elif self.flag.has('assembled_into_one_contig') and has_complete_gene:
if self.flag.has('unique_contig') and \
(not self.flag.has('scaffold_graph_bad')) and \
@@ -126,42 +134,70 @@ class SummaryCluster:
@classmethod
def _has_known_variant(cls, data_dict):
- return data_dict['has_known_var'] == '1'
+ if data_dict['has_known_var'] == '1':
+ return 'yes'
+ elif data_dict['known_var'] == '0':
+ return 'no'
+ elif data_dict['gene'] == '1': # we don't yet call hets in genes
+ return 'no'
+ else:
+ cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
+ return 'het' if cluster_var.is_het else 'no'
def _has_any_known_variant(self):
- for d in self.data:
- if self._has_known_variant(d):
- return 'yes'
- return 'no'
+ results = {self._has_known_variant(d) for d in self.data}
+ if 'yes' in results:
+ return 'yes'
+ else:
+ return 'het' if 'het' in results else 'no'
@classmethod
def _has_nonsynonymous(cls, data_dict):
- return data_dict['ref_ctg_effect'] != 'SYN' and \
- (
- data_dict['has_known_var'] == '1' or \
- (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.'))
- )
+ cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
+
+ has_non_het = data_dict['ref_ctg_effect'] != 'SYN' and \
+ (
+ data_dict['has_known_var'] == '1' or \
+ (data_dict['known_var'] != '1' and (data_dict['ref_ctg_change'] != '.' or data_dict['ref_ctg_effect'] != '.'))
+ )
+
+ if has_non_het and not cluster_var.is_het:
+ return 'yes'
+ else:
+ return 'het' if cluster_var.is_het else 'no'
def _has_any_nonsynonymous(self):
- for d in self.data:
- if self._has_nonsynonymous(d):
- return 'yes'
- return 'no'
+ results = {SummaryCluster._has_nonsynonymous(d) for d in self.data}
+
+ if 'yes' in results:
+ return 'yes'
+ else:
+ return 'het' if 'het' in results else 'no'
@classmethod
def _has_novel_nonsynonymous(cls, data_dict):
- return SummaryCluster._has_nonsynonymous(data_dict) and not SummaryCluster._has_known_variant(data_dict)
+ has_nonsynon = SummaryCluster._has_nonsynonymous(data_dict)
+ if has_nonsynon == 'no':
+ return 'no'
+ else:
+ has_known = SummaryCluster._has_known_variant(data_dict)
+ if has_known == 'no':
+ return has_nonsynon
+ else:
+ return 'no'
def _has_any_novel_nonsynonymous(self):
- for d in self.data:
- if self._has_novel_nonsynonymous(d):
- return 'yes'
- return 'no'
+ results = {SummaryCluster._has_novel_nonsynonymous(d) for d in self.data}
+
+ if 'yes' in results:
+ return 'yes'
+ else:
+ return 'het' if 'het' in results else 'no'
def _to_cluster_summary_has_known_nonsynonymous(self, assembled_summary):
@@ -198,12 +234,12 @@ class SummaryCluster:
return None
if data_dict['known_var'] == '1' and data_dict['ref_ctg_effect'] == 'SNP' \
- and data_dict['smtls_alt_nt'] != '.' and ';' not in data_dict['smtls_alt_nt']:
- nucleotides = [data_dict['ctg_nt']] + data_dict['smtls_alt_nt'].split(',')
- depths = data_dict['smtls_alt_depth'].split(',')
+ and data_dict['smtls_nts'] != '.' and ';' not in data_dict['smtls_nts']:
+ nucleotides = data_dict['smtls_nts'].split(',')
+ depths = data_dict['smtls_nts_depth'].split(',')
if len(nucleotides) != len(depths):
- raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_alt_nt, smtls_alt_depth columns. Cannot continue\n' + str(data_dict))
+ raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_nts, smtls_nts_depth columns. Cannot continue\n' + str(data_dict))
try:
var_nucleotide = data_dict['known_var_change'][-1]
@@ -220,14 +256,13 @@ class SummaryCluster:
return None
-
@staticmethod
def _get_nonsynonymous_var(data_dict):
'''if data_dict has a non synonymous variant, return string:
ref_name.change. Otherwise return None'''
has_nonsyn = SummaryCluster._has_nonsynonymous(data_dict)
- if not has_nonsyn:
+ if has_nonsyn == 'no':
return None
elif data_dict['known_var_change'] == data_dict['ref_ctg_change'] == '.' == data_dict['ref_ctg_effect']:
raise Error('Unexpected data in ariba summary... \n' + str(data_dict) + '\n... known_var_change, ref_ctg_change, ref_ctg_effect all equal to ".", but has a non synonymous change. Something is inconsistent. Cannot continue')
@@ -251,6 +286,7 @@ class SummaryCluster:
return (data_dict['ref_name'], var_change) + var_group
+
def _has_match(self, assembled_summary):
'''assembled_summary should be output of _to_cluster_summary_assembled'''
if assembled_summary.startswith('yes'):
@@ -266,7 +302,7 @@ class SummaryCluster:
'''Returns a set of the variant group ids that this cluster has'''
ids = set()
for d in self.data:
- if self._has_known_variant(d) and d['var_group'] != '.':
+ if self._has_known_variant(d) != 'no' and d['var_group'] != '.':
ids.add(d['var_group'])
return ids
@@ -298,7 +334,10 @@ class SummaryCluster:
for d in self.data:
snp_tuple = self._get_known_noncoding_het_snp(d)
if snp_tuple is not None:
- snp_id = d['var_description'].split(':')[4]
+ try:
+ snp_id = d['var_description'].split(':')[4]
+ except:
+ raise Error('Error getting ID from ' + str(d) + '\n' + d['var_description'])
if snp_id not in snps:
snps[snp_id] = {}
snps[snp_id][snp_tuple[0]] = snp_tuple[1]
@@ -311,7 +350,7 @@ class SummaryCluster:
for data_dict in data_dicts:
cluster_var = summary_cluster_variant.SummaryClusterVariant(data_dict)
- if cluster_var.has_nonsynon:
+ if cluster_var.has_nonsynon or cluster_var.is_het:
variants.add(cluster_var)
return variants
diff --git a/ariba/summary_cluster_variant.py b/ariba/summary_cluster_variant.py
index a0c30a9..62c231a 100644
--- a/ariba/summary_cluster_variant.py
+++ b/ariba/summary_cluster_variant.py
@@ -2,6 +2,11 @@ class Error (Exception): pass
class SummaryClusterVariant:
def __init__(self, data_dict):
+ self.known = None
+ self.var_group = None
+ self.coding = None
+ self.var_string = None
+ self.het_percent = None
self._get_nonsynon_variant_data(data_dict)
@@ -14,10 +19,7 @@ class SummaryClusterVariant:
def __str__(self):
- if self.has_nonsynon:
- return ', '.join((str(self.known), self.var_group, str(self.coding), self.var_string, str(self.het_percent)))
- else:
- return 'None'
+ return ', '.join((str(self.known), self.var_group, str(self.coding), self.var_string, str(self.het_percent)))
@classmethod
@@ -43,22 +45,30 @@ class SummaryClusterVariant:
@classmethod
def _get_is_het_and_percent(cls, data_dict):
- if data_dict['gene'] == '1' or not (data_dict['ref_ctg_effect'] == 'SNP' or data_dict['var_type'] == 'HET') or data_dict['smtls_alt_nt'] == '.' or ';' in data_dict['smtls_alt_nt'] or data_dict['smtls_alt_depth'] == 'ND':
+ if data_dict['gene'] == '1' or not (data_dict['known_var'] == '1' or data_dict['ref_ctg_effect'] == 'SNP' or data_dict['var_type'] == 'HET') or data_dict['smtls_nts'] == '.' or ';' in data_dict['smtls_nts'] or data_dict['smtls_nts_depth'] == 'ND':
return False, None
else:
- nucleotides = [data_dict['ctg_nt']] + data_dict['smtls_alt_nt'].split(',')
- depths = data_dict['smtls_alt_depth'].split(',')
+ nucleotides = data_dict['smtls_nts'].split(',')
+ depths = data_dict['smtls_nts_depth'].split(',')
if len(nucleotides) != len(depths):
- raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_alt_nt, smtls_alt_depth columns. Cannot continue\n' + str(data_dict))
+ raise Error('Mismatch in number of inferred nucleotides from ctg_nt, smtls_nts, smtls_nts_depth columns. Cannot continue\n' + str(data_dict))
try:
- is_het = False
+ depths = [int(x) for x in depths]
+ nuc_to_depth = dict(zip(nucleotides, depths))
if data_dict['ref_ctg_change'] != '.':
var_nucleotide = data_dict['ref_ctg_change'][-1]
elif data_dict['var_type'] == 'HET':
- var_nucleotide = data_dict['smtls_alt_nt']
+ var_nucleotide = '.'
+ best_depth = -1
+ for nuc in nuc_to_depth:
+ if nuc == data_dict['ctg_nt']:
+ continue
+ elif nuc_to_depth[nuc] > best_depth:
+ var_nucleotide = nuc
+ best_depth = nuc_to_depth[nuc]
elif data_dict['known_var_change'] != '.':
var_nucleotide = data_dict['known_var_change'][-1]
else:
@@ -66,15 +76,13 @@ class SummaryClusterVariant:
if var_nucleotide == '.':
return False, None
- depths = [int(x) for x in depths]
- nuc_to_depth = dict(zip(nucleotides, depths))
total_depth = sum(depths)
- var_depth = nuc_to_depth.get(var_nucleotide, 0)
-
- if data_dict['var_type'] == 'HET':
- is_het = True
+ if max([len(x) for x in nucleotides]) == 1:
+ var_depth = nuc_to_depth.get(var_nucleotide, 0)
else:
- is_het = SummaryClusterVariant._depths_look_het(depths)
+ var_depth = sum([nuc_to_depth[x] for x in nuc_to_depth if x[0] == var_nucleotide])
+
+ is_het = SummaryClusterVariant._depths_look_het(depths)
return is_het, round(100 * var_depth / total_depth, 1)
except:
@@ -82,6 +90,19 @@ class SummaryClusterVariant:
def _get_nonsynon_variant_data(self, data_dict):
+ self.known = data_dict['known_var'] == '1'
+ self.coding = data_dict['gene'] == '1'
+ self.var_group = data_dict['var_group']
+
+ if data_dict['known_var'] == '1' and data_dict['known_var_change'] != '.':
+ self.var_string = data_dict['known_var_change']
+ elif data_dict['ref_ctg_change'] != '.':
+ self.var_string = data_dict['ref_ctg_change']
+ else:
+ self.var_string = data_dict['ref_ctg_effect']
+
+ self.is_het, self.het_percent = SummaryClusterVariant._get_is_het_and_percent(data_dict)
+
if not SummaryClusterVariant._has_nonsynonymous(data_dict):
self.has_nonsynon = False
return
@@ -94,16 +115,4 @@ class SummaryClusterVariant:
data_dict['known_var_change'] != data_dict['ref_ctg_change']:
raise Error('Unexpected data in ariba summary... \n' + str(data_dict) + '\n... known_var_change != ref_ctg_change. Cannot continue')
- self.known = data_dict['known_var'] == '1'
- self.var_group = data_dict['var_group']
- self.coding = data_dict['gene'] == '1'
-
- if data_dict['known_var'] == '1' and data_dict['known_var_change'] != '.':
- self.var_string = data_dict['known_var_change']
- elif data_dict['ref_ctg_change'] != '.':
- self.var_string = data_dict['ref_ctg_change']
- else:
- self.var_string = data_dict['ref_ctg_effect']
-
- self.is_het, self.het_percent = SummaryClusterVariant._get_is_het_and_percent(data_dict)
diff --git a/ariba/tasks/getref.py b/ariba/tasks/getref.py
index d5cc618..b744454 100644
--- a/ariba/tasks/getref.py
+++ b/ariba/tasks/getref.py
@@ -5,7 +5,8 @@ from ariba import ref_genes_getter
def run(options):
getter = ref_genes_getter.RefGenesGetter(
options.db,
- version=options.version
+ version=options.version,
+ debug=options.debug
)
getter.run(options.outprefix)
diff --git a/ariba/tasks/prepareref.py b/ariba/tasks/prepareref.py
index ef52684..29c699e 100644
--- a/ariba/tasks/prepareref.py
+++ b/ariba/tasks/prepareref.py
@@ -25,6 +25,7 @@ def run(options):
clusters_file=options.cdhit_clusters,
threads=options.threads,
verbose=options.verbose,
+ force=options.force,
)
preparer.run(options.outdir)
diff --git a/ariba/tasks/run.py b/ariba/tasks/run.py
index 668b69d..db3bd32 100644
--- a/ariba/tasks/run.py
+++ b/ariba/tasks/run.py
@@ -1,5 +1,6 @@
import argparse
import os
+import shutil
import sys
import ariba
@@ -27,6 +28,9 @@ def run(options):
print('Input directory', options.prepareref_dir, 'not found. Cannot continue', file=sys.stderr)
sys.exit(1)
+ if options.force and os.path.exists(options.outdir):
+ shutil.rmtree(options.outdir)
+
if os.path.exists(options.outdir):
print('Output directory already exists. ARIBA makes the output directory. Cannot continue.', file=sys.stderr)
sys.exit(1)
diff --git a/ariba/test_run_data/metadata.tsv b/ariba/test_run_data/metadata.tsv
index 9d24ab1..77a71d5 100644
--- a/ariba/test_run_data/metadata.tsv
+++ b/ariba/test_run_data/metadata.tsv
@@ -15,5 +15,6 @@ noncoding1 0 0 A14T noncoding_group1 ref has wild type, reads have variant so sh
noncoding1 0 0 A40C . ref has variant, reads have wild type so should not report
noncoding2 0 0 . . generic description of noncoding2
noncoding3 0 0 . . generic description of noncoding3
+noncoding4 0 0 . . generic description of noncoding4
noncoding.var_only1 0 1 C6T . sample does not have this SNP
noncoding.var_only2 0 1 T10A . sample does have this SNP
diff --git a/ariba/test_run_data/reads_1.fq b/ariba/test_run_data/reads_1.fq
index 7e34e6a..62ff409 100644
--- a/ariba/test_run_data/reads_1.fq
+++ b/ariba/test_run_data/reads_1.fq
@@ -2086,3 +2086,7 @@ IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
GATCGTACTGAGGTGTCACGAACGACGGACCAGCGTCGTACGTACGTACGTACGTAGGAGACAGCAGCAAACTGACGTAATCTACCTGAT
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding4:1:18:130/1
+CCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTAGTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/test_run_data/reads_2.fq b/ariba/test_run_data/reads_2.fq
index f468576..d5a9376 100644
--- a/ariba/test_run_data/reads_2.fq
+++ b/ariba/test_run_data/reads_2.fq
@@ -2086,3 +2086,7 @@ IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
GCGGGTTTTTGGCCCCTATAACAGTACGTACGTCATGCTGCTGCTCGTAGTATATATAATTAATATATAGTATGCAGTCAGTTGTCGCGA
+
IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding4:1:18:130/2
+CTCCGTAACTCGCCGTTCTGTGCCTATCACGAACCGAGCGCGATGATTTTCCTTTTATCCACCAGGTTGGTAATATTCCTTTTAAGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/test_run_data/ref_fasta_to_make_reads_from.fa b/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
index 1bf715b..0294174 100644
--- a/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
+++ b/ariba/test_run_data/ref_fasta_to_make_reads_from.fa
@@ -46,6 +46,11 @@ CTAACTTACTACTATGACTGACTGACTGACTGACTGATCGACTGCTGACATCTGATCGAT
CATCCTGTCGACATCATATCTCGATCGATCGATCGACTGACTGACTGACTGACTGAATCT
CACGTACTGACTCATCATCATCATACTCATCATATCATCGATCGATCATCTGATCTGATG
ACGGGACGACCTCCGTTCGTGTCTGGCTGGTGTCAGCTTTGACTTAAGACGCGCCAAGCC
+>noncoding4
+AGAGATGATCTCTAGCTCCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTA
+GTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATGAATTTCACCAGTC
+GCAACCAGCTGCTCTTAAAAGGAATATTACCAACCTGGTGGATAAAAGGAAAATCATCGC
+GCTCGGTTCGTGATAGGCACAGAACGGCGAGTTACGGAGCCCGGATGGTAGATCGGTTAA
>noncoding.var_only1
CTTGGGGTTCCTGTACTAAAATCACTACGAAAGAGCAACCGTCCCGTTTCAGAGCTAAGG
CGTATCGACGTACTGACGTCGTATGCGTCGTCGACGTGTCCAGGCGCGCCGCGCGCCCCC
diff --git a/ariba/test_run_data/ref_seqs.fa b/ariba/test_run_data/ref_seqs.fa
index 8251d0a..9890fc3 100644
--- a/ariba/test_run_data/ref_seqs.fa
+++ b/ariba/test_run_data/ref_seqs.fa
@@ -19,6 +19,11 @@ GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
CTAACTTACTACTATGACTGACTGACTGACTGACTGATCGACTGCTGACATCTGATCGAT
CATCCTGTCGACATCATATCTCGATCGATCGATCGACTGACTGACTGACTGACTGAATCT
CACGTACTGACTCATCATCATCATACTCATCATATCATCGATCGATCATCTGATCTGATG
+>noncoding4
+AGAGATGATCTCTAGCTCCTTAATTAAATCTGGCTAGCTTAACGCTATATCAGCCCGTTA
+GTGTACTAGCCGTAGCATAGTCTAATTTGACCGGGTGAATAGCAATGAATTTCACCAGTC
+GCAACCAGCTGCTCTTAAAAGGAATATTACCAACCTGGTGGATAAAAGGAAAATCATCGC
+GCTCGGTTCGTGATAGGCACAGAACGGCGAGTTACGGAGCCCGGATGGTAGATCGGTTAA
>noncoding.var_only1
CGTATCATCGATCATCGTACGTACGTCGTCGTAGTCAGTCACGAGCAGAGAGAGAGGAGG
CTATGCATGCATCATCTCACTGCGAGTCAGGAGAGAAGCCGTGTAGCGACGAGCGCAAAA
diff --git a/ariba/tests/assembly_test.py b/ariba/tests/assembly_test.py
index 7ac81b8..740234c 100644
--- a/ariba/tests/assembly_test.py
+++ b/ariba/tests/assembly_test.py
@@ -68,7 +68,7 @@ class TestAssembly(unittest.TestCase):
tmp_log = 'tmp.test_assemble_with_fermilite.log'
tmp_log_fh = open(tmp_log, 'w')
print('First line', file=tmp_log_fh)
- a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh)
+ a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh, 'not needed')
a._assemble_with_fermilite()
self.assertTrue(a.assembled_ok)
tmp_log_fh.close()
@@ -87,7 +87,7 @@ class TestAssembly(unittest.TestCase):
tmp_log = 'tmp.test_assemble_with_fermilite_fails.log'
tmp_log_fh = open(tmp_log, 'w')
print('First line', file=tmp_log_fh)
- a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh)
+ a = assembly.Assembly(reads1, reads2, 'not needed', 'not needed', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', tmp_log_fh, 'not needed')
a._assemble_with_fermilite()
self.assertFalse(a.assembled_ok)
tmp_log_fh.close()
@@ -104,7 +104,7 @@ class TestAssembly(unittest.TestCase):
reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
tmp_dir = 'tmp.test_assemble_with_spades'
- a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+ a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
a._assemble_with_spades(unittest=True)
self.assertTrue(a.assembled_ok)
shutil.rmtree(tmp_dir)
@@ -117,7 +117,7 @@ class TestAssembly(unittest.TestCase):
reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
tmp_dir = 'tmp.test_assemble_with_spades'
- a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+ a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
a._assemble_with_spades(unittest=False)
self.assertFalse(a.assembled_ok)
shutil.rmtree(tmp_dir)
@@ -130,7 +130,7 @@ class TestAssembly(unittest.TestCase):
reads2 = os.path.join(data_dir, 'assembly_test_assemble_with_spades_reads_2.fq')
ref_fasta = os.path.join(data_dir, 'assembly_test_assemble_with_spades_ref.fa')
tmp_dir = 'tmp.test_scaffold_with_sspace'
- a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+ a = assembly.Assembly(reads1, reads2, 'not needed', ref_fasta, tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
a.assembly_contigs = os.path.join(data_dir, 'assembly_test_scaffold_with_sspace_contigs.fa')
a._scaffold_with_sspace()
self.assertTrue(os.path.exists(a.scaffolder_scaffolds))
@@ -162,7 +162,7 @@ class TestAssembly(unittest.TestCase):
reads1 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_1.fq')
reads2 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_2.fq')
tmp_dir = 'tmp.gap_fill_with_gapfiller_no_gaps'
- a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+ a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
a.scaffolder_scaffolds = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller.scaffolds_no_gaps.fa')
a._gap_fill_with_gapfiller()
self.assertTrue(os.path.exists(a.gapfilled_scaffolds))
@@ -175,7 +175,7 @@ class TestAssembly(unittest.TestCase):
reads1 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_1.fq')
reads2 = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller_reads_2.fq')
tmp_dir = 'tmp.gap_fill_with_gapfiller_with_gaps'
- a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout)
+ a = assembly.Assembly(reads1, reads2, 'not needed', 'ref.fa', tmp_dir, 'not_needed_for_this_test.fa', 'not_needed_for_this_test.bam', sys.stdout, 'not needed')
a.scaffolder_scaffolds = os.path.join(data_dir, 'assembly_test_gapfill_with_gapfiller.scaffolds_with_gaps.fa')
a._gap_fill_with_gapfiller()
self.assertTrue(os.path.exists(a.gapfilled_scaffolds))
diff --git a/ariba/tests/cluster_test.py b/ariba/tests/cluster_test.py
index 10b59ea..05a7109 100644
--- a/ariba/tests/cluster_test.py
+++ b/ariba/tests/cluster_test.py
@@ -105,7 +105,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=0, total_reads_bases=0)
c.run()
- expected = '\t'.join(['.', '.', '.', '64', '0', 'cluster_name'] + ['.'] * 24)
+ expected = '\t'.join(['.', '.', '.', '.', '64', '0', 'cluster_name'] + ['.'] * 24)
self.assertEqual([expected], c.report_lines)
self.assertFalse(c.status_flag.has('ref_seq_choose_fail'))
self.assertTrue(c.status_flag.has('assembly_fail'))
@@ -123,7 +123,26 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=2, total_reads_bases=108, spades_other_options='--only-assembler')
c.run()
- expected = '\t'.join(['.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)
+ expected = '\t'.join(['.', '.', '.', '.', '1024', '2', 'cluster_name'] + ['.'] * 24)
+ self.assertEqual([expected], c.report_lines)
+ self.assertTrue(c.status_flag.has('ref_seq_choose_fail'))
+ self.assertFalse(c.status_flag.has('assembly_fail'))
+ shutil.rmtree(tmpdir)
+
+
+ def test_full_run_ref_not_in_cluster(self):
+ '''test complete run of cluster when nearest ref is outside cluster'''
+ fasta_in = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.in.fa')
+ tsv_in = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.in.tsv')
+ refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+ tmpdir = 'tmp.test_full_run_ref_not_in_cluster'
+ ref_for_mash = os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster.mash.fa')
+ shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_ref_not_in_cluster'), tmpdir)
+
+ c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600, refdata_seqs_fasta_for_mash=ref_for_mash)
+ c.run()
+
+ expected = '\t'.join(['.', '.', '.', '.', '1024', '72', 'cluster_name'] + ['.'] * 24)
self.assertEqual([expected], c.report_lines)
self.assertTrue(c.status_flag.has('ref_seq_choose_fail'))
self.assertFalse(c.status_flag.has('assembly_fail'))
@@ -141,7 +160,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, total_reads=4, total_reads_bases=304)
c.run()
- expected = '\t'.join(['.', '.', '.', '64', '4', 'cluster_name'] + ['.'] * 24)
+ expected = '\t'.join(['.', '.', '.', '.', '64', '4', 'cluster_name'] + ['.'] * 24)
self.assertEqual([expected], c.report_lines)
self.assertFalse(c.status_flag.has('ref_seq_choose_fail'))
self.assertTrue(c.status_flag.has('assembly_fail'))
@@ -159,13 +178,14 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=72, total_reads_bases=3600)
c.run()
+ self.maxDiff=None
expected = [
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t14\t14\tA\t74\t74\tT\t19\t.\t19\tnoncoding1:0:0:A14T:.:ref has wild type, reads has variant so should report\tgeneric description of noncoding1',
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tG61T\tSNP\t61\t61\tG\t121\t121\tT\t24\t.\t24\t.\tgeneric description of noncoding1',
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\t.82C\tINS\t82\t82\t.\t143\t143\tC\t23\t.\t23\t.\tgeneric description of noncoding1',
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tT108.\tDEL\t108\t108\tT\t168\t168\t.\t17\t.\t17\t.\tgeneric description of noncoding1',
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA6G\t1\t.\t.\t6\t6\tG\t66\t66\tG\t19\t.\t19\tnoncoding1:0:0:A6G:.:variant in ref and reads so should report\tgeneric description of noncoding1',
- 'noncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tG9T\t0\t.\t.\t9\t9\tG\t69\t69\tG\t19\t.\t19\tnoncoding1:0:0:G9T:.:wild type in ref and reads\tgeneric description of noncoding1'
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t14\t14\tA\t74\t74\tT\t19\tT\t19\tnoncoding1:0:0:A14T:.:ref has wild type, reads has variant so should report\tgeneric description of noncoding1',
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tG61T\tSNP\t61\t61\tG\t121\t121\tT\t24\tT\t24\t.\tgeneric description of noncoding1',
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\t.82C\tINS\t82\t82\tA\t143\t143\tC\t23\tC\t23\t.\tgeneric description of noncoding1',
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t0\t.\tn\t.\t0\tT108.\tDEL\t108\t108\tT\t168\t168\tC\t17\tC\t17\t.\tgeneric description of noncoding1',
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tA6G\t1\t.\t.\t6\t6\tG\t66\t66\tG\t19\tG\t19\tnoncoding1:0:0:A6G:.:variant in ref and reads so should report\tgeneric description of noncoding1',
+ 'noncoding1\tnoncoding1\t0\t0\t531\t72\tcluster_name\t120\t120\t95.87\tcluster_name.scaffold.1\t234\t15.4\t1\tSNP\tn\tG9T\t0\t.\t.\t9\t9\tG\t69\t69\tG\t19\tG\t19\tnoncoding1:0:0:G9T:.:wild type in ref and reads\tgeneric description of noncoding1'
]
self.assertEqual(expected, c.report_lines)
@@ -184,12 +204,10 @@ class TestCluster(unittest.TestCase):
c.run()
expected = [
- 'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tA10V\t1\tA10V\tNONSYN\t29\t29\tC\t84\t84\tT\t22\t.\t22\tpresence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report\tGeneric description of presence_absence1',
- 'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t0\t.\tp\t.\t0\t.\tSYN\t54\t54\tT\t109\t109\tC\t32\t.\t32\t.\tGeneric description of presence_absence1',
-
- 'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t62\t64\tC;G;C\t18;17;17\t.;.;.\t18;17;17\tpresence_absence1:1:0:R3S:.:Ref and assembly have wild type\tGeneric description of presence_absence1',
-
- 'presence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tG;C;G\t68\t70\tG;C;G\t18;20;20\t.;.;.\t18;20;20\tpresence_absence1:1:0:I5A:.:Ref and reads have variant so report\tGeneric description of presence_absence1',
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tA10V\t1\tA10V\tNONSYN\t28\t30\tGCG\t83\t85\tGTG\t22;22;21\tG;T;G\t22;22;21\tpresence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report\tGeneric description of presence_absence1',
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t0\t.\tp\t.\t0\t.\tSYN\t52\t54\tATT\t107\t109\tATC\t31;31;32\tA;T;C\t31;31;32\t.\tGeneric description of presence_absence1',
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t62\t64\tCGC\t18;17;17\tC;G;C\t18;17;17\tpresence_absence1:1:0:R3S:.:Ref and assembly have wild type\tGeneric description of presence_absence1',
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t64\tcluster_name\t96\t96\t97.92\tcluster_name.scaffold.1\t213\t15.0\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tGCG\t68\t70\tGCG\t18;20;20\tG;C;G\t18;20;20\tpresence_absence1:1:0:I5A:.:Ref and reads have variant so report\tGeneric description of presence_absence1',
]
self.assertEqual(expected, c.report_lines)
@@ -207,7 +225,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
c.run()
expected = [
- 'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'
+ 'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, so do not report\tGeneric description of variants_only1'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -224,7 +242,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=66, total_reads_bases=3300)
c.run()
expected = [
- 'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'
+ 'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type, but always report anyway\tGeneric description of variants_only1'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -242,8 +260,8 @@ class TestCluster(unittest.TestCase):
c.run()
expected = [
- 'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tC;G;C\t65\t67\tC;G;C\t18;18;19\t.;.;.\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type\tGeneric description of variants_only1',
- 'variants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tG;C;G\t71\t73\tG;C;G\t17;17;17\t.;.;.\t17;17;17\tvariants_only1:1:1:I5A:.:Ref and reads have variant so report\tGeneric description of variants_only1',
+ 'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tR3S\t0\t.\t.\t7\t9\tCGC\t65\t67\tCGC\t18;18;19\tC;G;C\t18;18;19\tvariants_only1:1:1:R3S:.:Ref and assembly have wild type\tGeneric description of variants_only1',
+ 'variants_only1\tvariants_only1\t1\t1\t27\t66\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t215\t15.3\t1\tSNP\tp\tI5A\t1\t.\t.\t13\t15\tGCG\t71\t73\tGCG\t17;17;17\tG;C;G\t17;17;17\tvariants_only1:1:1:I5A:.:Ref and reads have variant so report\tGeneric description of variants_only1',
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -259,7 +277,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=112, total_reads_bases=1080)
c.run()
expected = [
- 'gene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
+ 'gene\tgene\t1\t0\t27\t112\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t364\t27.0\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of gene'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -275,7 +293,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
c.run()
expected = [
- 'ref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_gene'
+ 'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -293,7 +311,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
c.run()
expected = [
- 'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_gene'
+ 'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_gene'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -312,7 +330,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:0:M6I:.:Description of M6I snp\t.'
+ 'ref_gene\tref_gene\t1\t0\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:0:M6I:.:Description of M6I snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -331,7 +349,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:1:M6I:.:Description of M6I snp\t.'
+ 'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tM6I\t0\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:1:M6I:.:Description of M6I snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -350,7 +368,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tI6M\t1\t.\t.\t16\t18\tA;T;G\t135\t137\tA;T;G\t65;64;63\t.;.;A\t65;64;32,31\tref_gene:1:1:I6M:.:Description of I6M snp\t.'
+ 'ref_gene\tref_gene\t1\t1\t155\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tp\tI6M\t1\t.\t.\t16\t18\tATG\t135\t137\tATG\t65;64;63\tA;T;G,A\t65;64;32,31\tref_gene:1:1:I6M:.:Description of I6M snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -366,7 +384,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
c.run()
expected = [
- 'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_seq'
+ 'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -382,7 +400,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
c.run()
expected = [
- 'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'
+ 'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A\tGeneric description of ref_seq'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -398,7 +416,7 @@ class TestCluster(unittest.TestCase):
c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=148, total_reads_bases=13320)
c.run()
expected = [
- 'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\t.\tGeneric description of ref_seq'
+ 'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t0\tHET\t.\t.\t.\tG18A\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\t.\tGeneric description of ref_seq'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -417,7 +435,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:0:G18A:.:Description of G18A snp\t.'
+ 'ref_seq\tref_seq\t0\t0\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:0:G18A:.:Description of G18A snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -436,7 +454,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:1:G18A:.:Description of G18A snp\t.'
+ 'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tG18A\t0\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:1:G18A:.:Description of G18A snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -455,7 +473,7 @@ class TestCluster(unittest.TestCase):
# We shouldn't get an extra 'HET' line because we already know about the snp, so
# included in the report of the known snp
expected = [
- 'ref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tA18G\t1\t.\t.\t18\t18\tG\t137\t137\tG\t63\tA\t32,31\tref_seq:0:1:A18G:.:Description of A18G snp\t.'
+ 'ref_seq\tref_seq\t0\t1\t147\t148\tcluster_name\t96\t96\t100.0\tcluster_name.scaffold.1\t335\t39.8\t1\tSNP\tn\tA18G\t1\t.\t.\t18\t18\tG\t137\t137\tG\t63\tG,A\t32,31\tref_seq:0:1:A18G:.:Description of A18G snp\t.'
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
@@ -472,8 +490,59 @@ class TestCluster(unittest.TestCase):
c.run()
expected = [
- 'presence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.scaffold.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+ 'presence_absence1\tpresence_absence1\t1\t0\t19\t278\tcluster_name\t96\t77\t100.0\tcluster_name.scaffold.1\t949\t20.5\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tGeneric description of presence_absence1'
+ ]
+ self.assertEqual(expected, c.report_lines)
+ shutil.rmtree(tmpdir)
+
+
+ def test_full_run_multiple_vars_in_codon(self):
+ '''Test complete run where there is a codon with a SNP and an indel'''
+ fasta_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.fa')
+ tsv_in = os.path.join(data_dir, 'cluster_test_full_run_multiple_vars.tsv')
+ refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+ tmpdir = 'tmp.cluster_test_full_run_multiple_vars'
+ shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_multiple_vars'), tmpdir)
+ c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+ c.run()
+
+ expected = [
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t96\t96\t96.91\tcluster_name.scaffold.1\t1074\t20.4\t0\t.\tp\t.\t0\t.\tMULTIPLE\t25\t26\tGA\t487\t489\tCAT\t27;26;25\tC;A;T\t27;26;25\t.\tGeneric description of presence_absence1',
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t96\t96\t96.91\tcluster_name.scaffold.1\t1074\t20.4\t0\t.\tp\t.\t0\tA10fs\tFSHIFT\t28\t28\tG\t491\t491\tG\t26\tG\t26\t.\tGeneric description of presence_absence1',
]
self.assertEqual(expected, c.report_lines)
shutil.rmtree(tmpdir)
+
+ def test_full_run_delete_codon(self):
+ '''Test complete run where there is a deleted codon'''
+ fasta_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.fa')
+ tsv_in = os.path.join(data_dir, 'cluster_test_full_run_delete_codon.tsv')
+ refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+ tmpdir = 'tmp.cluster_test_full_delete_codon'
+ shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_delete_codon'), tmpdir)
+ c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+ c.run()
+
+ expected = [
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t117\t117\t92.31\tcluster_name.scaffold.1\t1104\t20.0\t0\t.\tp\t.\t0\tR25_A26del\tDEL\t73\t73\tA\t553\t553\tA\t27\tA\t27\t.\tGeneric description of presence_absence1',
+ ]
+ self.assertEqual(expected, c.report_lines)
+ shutil.rmtree(tmpdir)
+
+
+ def test_full_run_insert_codon(self):
+ '''Test complete run where there is a inserted codon'''
+ fasta_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.fa')
+ tsv_in = os.path.join(data_dir, 'cluster_test_full_run_insert_codon.tsv')
+ refdata = reference_data.ReferenceData([fasta_in], [tsv_in])
+ tmpdir = 'tmp.cluster_test_full_insert_codon'
+ shutil.copytree(os.path.join(data_dir, 'cluster_test_full_run_insert_codon'), tmpdir)
+ c = cluster.Cluster(tmpdir, 'cluster_name', refdata, spades_other_options='--only-assembler', total_reads=292, total_reads_bases=20900)
+ c.run()
+
+ expected = [
+ 'presence_absence1\tpresence_absence1\t1\t0\t539\t292\tcluster_name\t108\t108\t92.31\tcluster_name.scaffold.1\t1115\t19.9\t0\t.\tp\t.\t0\tS25_M26insELI\tINS\t73\t73\tA\t554\t554\tG\t24\tG\t24\t.\tGeneric description of presence_absence1'
+ ]
+ self.assertEqual(expected, c.report_lines)
+ shutil.rmtree(tmpdir)
diff --git a/ariba/tests/clusters_test.py b/ariba/tests/clusters_test.py
index a107003..1056129 100644
--- a/ariba/tests/clusters_test.py
+++ b/ariba/tests/clusters_test.py
@@ -24,6 +24,7 @@ class TestClusters(unittest.TestCase):
self.refdata_dir = 'tmp.RefData'
os.mkdir(self.refdata_dir)
shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.fa'), os.path.join(self.refdata_dir, '02.cdhit.all.fa'))
+ shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.fa.msh'), os.path.join(self.refdata_dir, '02.cdhit.all.fa.msh'))
shutil.copyfile(os.path.join(data_dir, 'clusters_test_dummy_db.tsv'), os.path.join(self.refdata_dir, '01.filter.check_metadata.tsv'))
with open(os.path.join(self.refdata_dir, '00.info.txt'), 'w') as f:
print('genetic_code\t11', file=f)
@@ -163,6 +164,16 @@ class TestClusters(unittest.TestCase):
bin_size = 10
inprefix = os.path.join(data_dir, 'clusters_test_load_minimap_files')
got_clster2rep, got_cluster_read_count, got_cluster_base_count, got_insert_hist, got_proper_pairs = clusters.Clusters._load_minimap_files(inprefix, bin_size)
+ expected_clster2rep = {'1': 'ref2', '2': 'ref42'}
+ expected_cluster_read_count = {'1': 42, '2': 43}
+ expected_cluster_base_count = {'1': 4242, '2': 4343}
+ expected_insert_hist_bins = {80: 3, 90: 20, 100: 7, 110: 3}
+ expected_proper_pairs = 42424242
+ self.assertEqual(expected_clster2rep, got_clster2rep)
+ self.assertEqual(expected_cluster_read_count, got_cluster_read_count)
+ self.assertEqual(expected_cluster_base_count, got_cluster_base_count)
+ self.assertEqual(expected_insert_hist_bins, got_insert_hist.bins)
+ self.assertEqual(expected_proper_pairs, got_proper_pairs)
def test_set_insert_size_data(self):
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon.fa b/ariba/tests/data/cluster_test_full_run_delete_codon.fa
new file mode 100644
index 0000000..021f2f2
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon.tsv b/ariba/tests/data/cluster_test_full_run_delete_codon.tsv
new file mode 100644
index 0000000..da35140
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon.tsv
@@ -0,0 +1 @@
+presence_absence1 1 0 . . Generic description of presence_absence1
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa b/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa
new file mode 100644
index 0000000..20af6ce
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGC
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
+GTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCT
+GGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCA
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAA
+TTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCA
+TGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAG
+GGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGT
+TCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCAT
+CAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTA
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGA
+TTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
+GTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTT
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
+AGTCTCTGGCACGTCTGACGACTCAATCTCGAGATCGTACACGGCGCTGTAGAGCCATGT
+AATCCCTCTGTTAAAGATCTGGCGACCTTAACAATAGAAA
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq
new file mode 100644
index 0000000..2269d0b
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_1.fq
@@ -0,0 +1,588 @@
+ at presence_absence1:1:493:567/1
+GAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:556:631/1
+ATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:707:783/1
+ATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:32:108/1
+GACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:925:999/1
+ACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:700:774/1
+ATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:770:844/1
+AAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:185:261/1
+CCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:871:946/1
+AGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:484:560/1
+GCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:629:704/1
+CACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:810:884/1
+GGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:277:351/1
+TTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:221:295/1
+ACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:778:852/1
+TCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:611:685/1
+GGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:706:780/1
+CATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:25:99/1
+ACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:70:144/1
+GACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:409:482/1
+CGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:767:841/1
+TAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:560:635/1
+CCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:83:157/1
+GATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:530:606/1
+GCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:485:561/1
+CGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:409:483/1
+CGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:553:627/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:529:599/1
+AGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:79:155/1
+CAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:749:822/1
+CTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:882:956/1
+CACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:82:157/1
+TGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:578:652/1
+AACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:299:373/1
+AATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:267:342/1
+AGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:80:153/1
+AGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:320:394/1
+GCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:616:689/1
+ACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:707:781/1
+ATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:30:104/1
+GGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:249:324/1
+GTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:264:337/1
+GTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:441:515/1
+CAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:553:628/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:641:716/1
+GCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:385:460/1
+CCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:46:121/1
+TAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:609:683/1
+CAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:169:243/1
+AACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:601:677/1
+ATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:295:372/1
+TGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:347:422/1
+GAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:382:456/1
+ACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:92:167/1
+ATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:654:729/1
+CTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:524:598/1
+AGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:742:816/1
+ATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:178:253/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:755:828/1
+TCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:200:274/1
+GACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:425:499/1
+TAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:470:546/1
+GGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:198:273/1
+CCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:692:766/1
+ATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:543:616/1
+TCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:161:235/1
+TCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:908:984/1
+TTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:729:805/1
+CGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:798:873/1
+CGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:760:835/1
+CTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:135:210/1
+CTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:443:519/1
+GGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:1:75/1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:400:474/1
+CGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:44:118/1
+TATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:865:937/1
+TCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:702:776/1
+TTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:199:272/1
+CGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:58:132/1
+AGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:772:848/1
+GTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:748:823/1
+ACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:510:583/1
+ACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:862:935/1
+TAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:324:399/1
+CACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:489:563/1
+CGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:637:711/1
+ATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:451:526/1
+AGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:713:787/1
+ATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:597:671/1
+GATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:246:318/1
+CTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:658:733/1
+TTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:624:700/1
+CAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:275:349/1
+GCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:497:572/1
+TTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:204:277/1
+ATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:427:502/1
+GGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:550:624/1
+TATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:188:262/1
+CTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:953:1030/1
+CAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTCTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:924:1000/1
+GACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:236:310/1
+GGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:67:141/1
+CCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:750:825/1
+TAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:746:822/1
+GTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:345:419/1
+TCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:225:300/1
+TAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:254:329/1
+ATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:783:856/1
+ATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:214:290/1
+TAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:499:575/1
+GGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:439:513/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:452:525/1
+GCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:196:271/1
+AACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:68:142/1
+CTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:879:956/1
+GCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:211:286/1
+TGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:570:645/1
+TGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:465:539/1
+AAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:701:776/1
+TTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:715:789/1
+TCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:124:198/1
+CACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:641:715/1
+GCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:368:441/1
+TTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:654:728/1
+CTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:901:976/1
+TAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:178:252/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:825:900/1
+TAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:10:85/1
+ATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:116:189/1
+GACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:145:221/1
+CTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:526:602/1
+CGCAGCACCTACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:934:1009/1
+GTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:893:969/1
+CTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:774:849/1
+TGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:763:836/1
+AGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:469:544/1
+AGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:905:977/1
+CCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:108:183/1
+CTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:782:855/1
+CATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:811:886/1
+GTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:383:457/1
+CACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:331:405/1
+CAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:719:794/1
+TTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:847:921/1
+GTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:330:404/1
+GCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:757:831/1
+AGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:899:974/1
+CGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq
new file mode 100644
index 0000000..487bff5
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/reads_2.fq
@@ -0,0 +1,588 @@
+ at presence_absence1:1:493:567/2
+CCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:556:631/2
+ATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:707:783/2
+GAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:32:108/2
+CCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:925:999/2
+GGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:700:774/2
+ACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:770:844/2
+CATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:185:261/2
+TATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:871:946/2
+GTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:484:560/2
+TCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:629:704/2
+ATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:810:884/2
+CCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:277:351/2
+ACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:221:295/2
+AAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:778:852/2
+GTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:611:685/2
+TCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:706:780/2
+CAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:25:99/2
+GTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:70:144/2
+AATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:409:482/2
+TACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:767:841/2
+CAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:560:635/2
+GATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:83:157/2
+AGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:530:606/2
+CCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:485:561/2
+GTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:409:483/2
+ATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:553:627/2
+ATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:529:599/2
+AGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:79:155/2
+GGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:749:822/2
+ACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:882:956/2
+GTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:82:157/2
+AGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:578:652/2
+TCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:299:373/2
+CAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:267:342/2
+TGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:80:153/2
+TTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:320:394/2
+GTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:616:689/2
+TCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:707:781/2
+GCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:30:104/2
+GTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:249:324/2
+CGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:264:337/2
+TCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:441:515/2
+ATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:553:628/2
+AATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:641:716/2
+ATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:385:460/2
+GGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:46:121/2
+CATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:609:683/2
+TGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:169:243/2
+TATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:601:677/2
+TAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:295:372/2
+AACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:347:422/2
+ATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:382:456/2
+CTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:92:167/2
+TTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:654:729/2
+ATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:524:598/2
+GTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:742:816/2
+ACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:178:253/2
+TGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:755:828/2
+TACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:200:274/2
+TCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:425:499/2
+AACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:470:546/2
+AGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:198:273/2
+CGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:692:766/2
+GTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:543:616/2
+CGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:161:235/2
+TCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:908:984/2
+ACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:729:805/2
+CGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:798:873/2
+AACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:760:835/2
+AATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:135:210/2
+CTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:443:519/2
+ATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:1:75/2
+GTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAATCACTGATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:400:474/2
+ACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:44:118/2
+TTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:865:937/2
+ACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:702:776/2
+ATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:199:272/2
+GAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:58:132/2
+TATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:772:848/2
+GTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:748:823/2
+CACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:510:583/2
+ATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:862:935/2
+TAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:324:399/2
+GGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:489:563/2
+TGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:637:711/2
+AATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:451:526/2
+AATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:713:787/2
+GCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:597:671/2
+CGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:246:318/2
+GCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:658:733/2
+ACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:624:700/2
+AACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:275:349/2
+ACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:497:572/2
+TCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:204:277/2
+TACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:427:502/2
+TTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:550:624/2
+TGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:188:262/2
+TTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:953:1030/2
+TATTGTTAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:924:1000/2
+GGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:236:310/2
+TGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:67:141/2
+TAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:750:825/2
+GACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:746:822/2
+ACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:345:419/2
+CATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:225:300/2
+GGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:254:329/2
+TGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:783:856/2
+GTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:214:290/2
+GGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:499:575/2
+AACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:439:513/2
+TATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:452:525/2
+ATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCACCGTAGGTGCTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:196:271/2
+AATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:68:142/2
+TTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:879:956/2
+GTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:211:286/2
+TGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:570:645/2
+AGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:465:539/2
+CCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTGATCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:701:776/2
+ATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:715:789/2
+TAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:124:198/2
+GCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:641:715/2
+TTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:368:441/2
+ATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:654:728/2
+TTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:901:976/2
+GTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:178:252/2
+GTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:825:900/2
+GAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:10:85/2
+ATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:116:189/2
+AAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:145:221/2
+ATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:526:602/2
+GGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:934:1009/2
+TTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:893:969/2
+ATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:774:849/2
+CGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:763:836/2
+CAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:469:544/2
+GGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAACTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:905:977/2
+CGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:108:183/2
+GAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:782:855/2
+TGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:811:886/2
+GACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:383:457/2
+GCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:331:405/2
+CTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:719:794/2
+TAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:847:921/2
+GGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:330:404/2
+TGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:757:831/2
+GATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:899:974/2
+GTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa b/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa
new file mode 100644
index 0000000..021f2f2
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_delete_codon/references.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon.fa b/ariba/tests/data/cluster_test_full_run_insert_codon.fa
new file mode 100644
index 0000000..be61945
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon.tsv b/ariba/tests/data/cluster_test_full_run_insert_codon.tsv
new file mode 100644
index 0000000..da35140
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon.tsv
@@ -0,0 +1 @@
+presence_absence1 1 0 . . Generic description of presence_absence1
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa b/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa
new file mode 100644
index 0000000..3a4a124
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGC
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGT
+GTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCT
+GGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCA
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAA
+TTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCA
+TGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAG
+GGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGT
+TCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCAT
+CAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTA
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGA
+TTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
+GTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTT
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCT
+AGTCTCTGGCACGTCTGACGACTCAATCTCGAGATCGTACACGGCGCTGTAGAGCCATGT
+AATCCCTCTGTTAAAGATCTGGCGACCTTAACAATAGAAA
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq
new file mode 100644
index 0000000..160a42e
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_1.fq
@@ -0,0 +1,592 @@
+ at presence_absence1:1:79:154/1
+CAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:904:979/1
+GTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:869:942/1
+GCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:238:314/1
+CCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:202:276/1
+CTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:560:635/1
+ATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:321:394/1
+CGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:795:871/1
+ACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:489:563/1
+CGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:789:865/1
+TACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:292:367/1
+CGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:381:457/1
+TACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:673:747/1
+GGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:163:239/1
+CTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:69:143/1
+TGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:812:888/1
+TCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:822:895/1
+ACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:61:138/1
+ACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:969:1041/1
+CAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTCTGACGACTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:880:955/1
+AGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:941:1015/1
+AAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:67:141/1
+CCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:470:546/1
+GGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:482:555/1
+TGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:714:790/1
+TCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:857:932/1
+TATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:312:386/1
+ACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:26:99/1
+CGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:307:381/1
+AGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:587:661/1
+AACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:734:808/1
+GATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:657:730/1
+TTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:584:657/1
+AAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:239:314/1
+CAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:912:987/1
+ATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:352:426/1
+AAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:622:695/1
+GAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:514:587/1
+ATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:1:76/1
+CTTAATTATATATCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:406:481/1
+ACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:13:88/1
+TCCAGTACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:246:321/1
+CTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:107:181/1
+GCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:425:500/1
+TAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:255:329/1
+TTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:126:201/1
+CGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:458:532/1
+ACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:352:427/1
+AAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:303:378/1
+TACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:640:715/1
+CGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:598:672/1
+TTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:865:938/1
+TCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:938:1012/1
+ACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:237:311/1
+GCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:626:701/1
+CCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:487:561/1
+TGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:458:533/1
+ACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:360:434/1
+ATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:892:966/1
+ACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:327:402/1
+AACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:655:728/1
+AGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:944:1020/1
+TCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:281:355/1
+GCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:876:950/1
+TACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:29:103/1
+GGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:439:514/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:843:916/1
+CCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:48:121/1
+GCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:641:717/1
+GGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:448:524/1
+ACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:472:545/1
+CCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:298:373/1
+GAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:84:158/1
+ATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:473:547/1
+CTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:115:190/1
+GGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:803:877/1
+GGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:241:316/1
+AACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:138:213/1
+CTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:845:918/1
+TAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:492:567/1
+TGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:908:983/1
+CGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:624:699/1
+AACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:235:310/1
+TGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:823:896/1
+CCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:283:358/1
+AGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:255:331/1
+TTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:72:148/1
+CCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:615:688/1
+GTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:936:1009/1
+ACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:711:784/1
+TTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:590:666/1
+TGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:747:820/1
+TGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:234:309/1
+ATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:325:399/1
+ACAACGCAATAAAAGATCATTCGAGTAAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:214:287/1
+TAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATTTGTGAGCAGCCGGCTTTGGCAGGCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:155:230/1
+TCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:351:427/1
+AAAGAGACCATGCCGGCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:734:809/1
+GATGCGGACTGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:756:830/1
+TACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:266:341/1
+GAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:639:714/1
+ACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:278:353/1
+TTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:265:341/1
+TGAGCAGCCGGCTTTGGCAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:394:468/1
+ACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:918:993/1
+TGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:818:891/1
+GGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:745:818/1
+GGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:282:356/1
+CAGGCAGTGTCGATGTGAATTTACCAGAGCACGATATAGCGCCACAACGCAATAAAAGATCATTCGAGTAAAGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:942:1016/1
+AGTCCCTCACAAGTTTTCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:562:637/1
+AGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:778:854/1
+GAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:445:519/1
+TTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:366:440/1
+GCTTGTACCAAAAGTTACACCCTATGCCACTCGGCGCAGTACACGACTTCAATAGGGTGTAGGGTCAGCCCGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:413:488/1
+TTCAATAGGGTGTAGGGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:564:638/1
+TATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:774:850/1
+CTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:101:175/1
+GCTCTAGCTCAGAGGGACGTGTCCACGCGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:958:1033/1
+TCGCCAATGGTCAATATTGCTTACTCGTACCTCCTGCTCATGTGACCTCACTATCGGTCTAGTCTCTGGCACGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:128:203/1
+CGTAAGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:860:934/1
+TTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:672:747/1
+CGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:676:750/1
+ACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:188:262/1
+CTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:186:260/1
+CGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:647:722/1
+TTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:743:818/1
+TGGGTGAGATCGGTACTAATCTCAGACTGAGCTTAGAAGTTGTATCTACATTACAATGAAGGTACGCAATCTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:477:552/1
+CAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAAGTTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:793:868/1
+TTACAATGAAGGTACGCAATCTGTTGGGTACCAAGCAAATTTAGATTGGACCTAAACTACCTTGTATTTTGCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:33:106/1
+ACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:187:261/1
+GCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGGTATTCCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:681:756/1
+CTCCCGCGGCCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:899:973/1
+TGGCTGTGTCGTAATCCATTGCGTTGATGGAACGGACACACAAAGTCCCTCACAAGTTTTCGCCAATGGTCAATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:472:547/1
+CCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTACGGTGATCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:36:111/1
+CTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAATCCTCTTGGCTCTAGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:617:693/1
+GCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTATTTCGCCGGAACTGACTCCCGCGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:832:905/1
+TTTAGATTGGACCTAAACTACCTTGTATTTTGCTCACGCTAGTCTACCAGAGAAACGCACACGGGAGTGGCTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:168:242/1
+AAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:428:503/1
+GGTCAGCCCGTCCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:19:92/1
+ACATCAACGGGGGGACGCTACGTGGTATAGCCGGATTACAGCACAGGACCTGACCATAATCAGTGATTCACTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:132:204/1
+AGGCTTCTTATGTCTCACGATCATCCATGTCCTCCTAAACACTAAACCTGGTTCCGCTAAAATGAACCGACTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:538:613/1
+GGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATACGGATTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:690:763/1
+CCTAGATACGGATATACAAATTTATCATCAATATTCTCTTATTTGATGCGGACTGGGTGAGATCGGTACTAATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:439:514.dup.2/1
+CCCAGGTTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:592:666/1
+CAATAATTAATACGGATTATTGGGTGCAGGGAAACCCTCTGCAACGCACGGACCATTGGCAGGAGTTCGGACTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:445:519.dup.2/1
+TTGACGAGCAACAACGTCCAAAACAGGCCTAGCAGAATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:530:605/1
+GCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAATTAATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:521:595/1
+CGCAGCGCAGCACCTACGGTGATCAAGTTTATAGAGCTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:148:178:250/1
+CCTGGTTCCGCTAAAATGAACCGACTATAATAGTGTTAATTAAACAATAACCCTAGATGGCCAAACAACTGGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq
new file mode 100644
index 0000000..33460df
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/reads_2.fq
@@ -0,0 +1,592 @@
+ at presence_absence1:1:79:154/2
+GTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:904:979/2
+GATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:869:942/2
+GACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:238:314/2
+AGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:202:276/2
+ACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:560:635/2
+TTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:321:394/2
+GTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:795:871/2
+GACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:489:563/2
+CGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:789:865/2
+GTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:292:367/2
+GGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:381:457/2
+GCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:673:747/2
+ACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:163:239/2
+GTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:69:143/2
+ATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:812:888/2
+GGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:822:895/2
+GACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:61:138/2
+CACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:969:1041/2
+TCTATTGTTAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:880:955/2
+GTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:941:1015/2
+AACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:67:141/2
+TAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:470:546/2
+CTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:482:555/2
+AGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:714:790/2
+GCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:857:932/2
+GAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:312:386/2
+CGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:26:99/2
+GTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:307:381/2
+TTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:587:661/2
+TCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:734:808/2
+TCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:657:730/2
+CCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:584:657/2
+ATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:239:314/2
+AGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:912:987/2
+CCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:352:426/2
+CCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:622:695/2
+GTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:514:587/2
+CCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:1:76/2
+CGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGTGAATCACTGATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:406:481/2
+TCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:13:88/2
+GACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGATTAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:246:321/2
+GTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:107:181/2
+ATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:425:500/2
+GACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:255:329/2
+TGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:126:201/2
+CCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:458:532/2
+CCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:352:427/2
+GCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:303:378/2
+CTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:640:715/2
+AGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:598:672/2
+CCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:865:938/2
+GATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:938:1012/2
+AGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:237:311/2
+GTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:626:701/2
+AGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:487:561/2
+TTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:458:533/2
+TCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:360:434/2
+TAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:892:966/2
+AGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:327:402/2
+CTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:655:728/2
+TTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:944:1020/2
+TCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:281:355/2
+CCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:876:950/2
+AGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:29:103/2
+TTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:439:514/2
+TTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:843:916/2
+GAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:48:121/2
+CATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:641:717/2
+GTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:448:524/2
+ATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:472:545/2
+TGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:298:373/2
+CAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:84:158/2
+TAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:473:547/2
+CCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:115:190/2
+CAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:803:877/2
+GTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:241:316/2
+ATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:138:213/2
+TGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:845:918/2
+AGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:492:567/2
+CGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:908:983/2
+GTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:624:699/2
+CTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:235:310/2
+TGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:823:896/2
+TGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:283:358/2
+TGACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:255:331/2
+ACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:72:148/2
+GTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:615:688/2
+ATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:936:1009/2
+GGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:711:784/2
+TACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:590:666/2
+CCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:747:820/2
+CGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:234:309/2
+GTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:325:399/2
+GGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCCTACACCCTATTGAAGTCGTGTACTGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:214:287/2
+ATGGTCTCTTTACTCGAATGATCTTTTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:155:230/2
+TAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:351:427/2
+GCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGGGACGGGCTGACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:734:809/2
+CTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCCAACAGATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:756:830/2
+CAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:266:341/2
+GAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:639:714/2
+GATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:278:353/2
+CTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:265:341/2
+GAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTCTCTTTACTCGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:394:468/2
+TCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:918:993/2
+ACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:818:891/2
+ATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:745:818/2
+TGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:282:356/2
+ACCCTACACCCTATTGAAGTCGTGTACTGCGCCGAGTGGCATAGGGTGTAACTTTTGGTACAAGCCGGCATGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:942:1016/2
+TAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCAGACGTGCCAGAGACTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:562:637/2
+AATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:778:854/2
+CCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:445:519/2
+TGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:366:440/2
+TCAGTTTAATATGGCCAAATTCATCGCACGCCATTCTGCTAGGCCTGTTTTGGACGTTGTTGCTCGTCAACCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:413:488/2
+TATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATATGGCCAAATTCATCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:564:638/2
+AAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:774:850/2
+CAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:101:175/2
+CCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATAGTCGGTTCATTTTAGCGGAACCAGGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:958:1033/2
+TAAGGTCGCCAGATCTTTAACAGAGGGATTACATGGCTCTACAGCGCCGTGTACGATCTCGAGATTGAGTCGTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:128:203/2
+AGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:860:934/2
+GTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:672:747/2
+ACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:676:750/2
+GGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:188:262/2
+TTATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:186:260/2
+ATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:647:722/2
+GTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:743:818/2
+TGTGCGTTTCTCTGGTAGACTAGCGTGAGCAAAATACAAGGTAGTTTAGGTCCAATCTAAATTTGCTTGGTACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:477:552/2
+GTTTCCCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:793:868/2
+TTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACACAGCCACTCCCGTGTGCGTTTCTCTGGTAGACTAGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:33:106/2
+AGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:187:261/2
+TATTGCGTTGTGGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:681:756/2
+TTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAGATTAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:899:973/2
+GAGATTGAGTCGTCAGACGTGCCAGAGACTAGACCGATAGTGAGGTCACATGAGCAGGAGGTACGAGTAAGCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:472:547/2
+CCTGCACCCAATAATCCGTATTAATTATTGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:36:111/2
+GAACCAGGTTTAGTGTTTAGGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:617:693/2
+CTGAGATTAGTACCGATCTCACCCAGTCCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:832:905/2
+AAGCAATATTGACCATTGGCGAAAACTTGTGAGGGACTTTGTGTGTCCGTTCCATCAACGCAATGGATTACGACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:168:242/2
+ATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:428:503/2
+ACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCATCAGTTTAATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:19:92/2
+GGAGGACATGGATGATCGTGAGACATAAGAAGCCTTACGCGTGGACACGTCCCTCTGAGCTAGAGCCAAGAGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:132:204/2
+AAGCCGGCTGCTCACAAATGGGAATACCACCAGTTGTTTGGCCATCTAGGGTTATTGTTTAATTAACACTATTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:538:613/2
+GCGGGAGTCAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:690:763/2
+TCTAAATTTGCTTGGTACCCAACAGATTGCGTACCTTCATTGTAATGTAGATACAACTTCTAAGCTCAGTCTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:439:514.dup.2/2
+TTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGGTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:592:666/2
+CCGCATCAAATAAGAGAATATTGATGATAAATTTGTATATCCGTATCTAGGCCGCGGGAGTCAGTTCCGGCGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:445:519.dup.2/2
+TGCAGTTTTTCAACAAACAGACTGGGCATACTATAAGCTCTATAAACTTGATCACCGTAGGTGCTGCGCTGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:530:605/2
+CAGTTCCGGCGAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:147:521:595/2
+GAAATAGTCCGAACTCCTGCCAATGGTCCGTGCGTTGCAGAGGGTTTCCCTGCACCCAATAATCCGTATTAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:148:178:250/2
+GGCGCTATATCGTGCTCTGGTAAATTCACATCGACACTGCCTGCCAAAGCCGGCTGCTCACAAATGGGAATACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa b/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa
new file mode 100644
index 0000000..be61945
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_insert_codon/references.fa
@@ -0,0 +1,3 @@
+>presence_absence1
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACC
+TACGGTGATCAAGTTTATAGTATGCCCAGTCTGTTTGTTGAAAAACTGCAATAA
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars.fa
new file mode 100644
index 0000000..c7a01d8
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars.fa
@@ -0,0 +1,5 @@
+>presence_absence1
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+>presence_absence2
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTAA
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv b/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv
new file mode 100644
index 0000000..0e711a6
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars.tsv
@@ -0,0 +1,2 @@
+presence_absence1 1 0 . . Generic description of presence_absence1
+presence_absence2 1 0 . . Generic description of presence_absence2
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa
new file mode 100644
index 0000000..3d5115f
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/for_reads.fa
@@ -0,0 +1,20 @@
+>presence_absence1
+GTCTAGGCCATTATCGATCGACAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAA
+ACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCT
+TCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTAC
+TTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTT
+GCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAG
+TTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGT
+CGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAAT
+GGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAA
+ATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+CGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGA
+GCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGA
+ATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCC
+TGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCA
+GTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTA
+CCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATG
+AATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGA
+CAACAGTTGAAAACTCACTTGTTCATCCCTGACCTTAGCCGACCACCGCTGGGTAATGAG
+CTCGGTACTGTCTGGCCTGGCACGCAAACAAATGGCTTAG
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq
new file mode 100644
index 0000000..52d79ad
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_1.fq
@@ -0,0 +1,584 @@
+ at presence_absence1:1:768:842/1
+CCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:395:471/1
+CTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:732:807/1
+CCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:745:820/1
+AATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:245:320/1
+CTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:280:354/1
+ATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:617:690/1
+CGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:646:721/1
+GGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:239:312/1
+TTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:770:844/1
+ACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:198:273/1
+CAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:441:516/1
+CGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:650:724/1
+CCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:528:601/1
+CAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:840:915/1
+AGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:588:663/1
+ACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:536:610/1
+GCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:153:229/1
+TGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:427:501/1
+TGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:472:547/1
+AATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:498:573/1
+GACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:760:836/1
+AGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:808:884/1
+CGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:912:986/1
+TGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:151:225/1
+ATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:322:398/1
+TACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:790:866/1
+ACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:642:715/1
+TAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:944:1018/1
+TGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTGAAAACTCACTTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:358:432/1
+TGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:626:700/1
+CTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:741:814/1
+TAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:419:493/1
+ATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:140:213/1
+GGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:443:517/1
+TGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:83:158/1
+AATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:472:548/1
+AATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:512:586/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:637:709/1
+GCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:624:697/1
+TCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:454:528/1
+TGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:417:492/1
+CAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:928:1003/1
+AGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:214:287/1
+ACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:811:887/1
+GTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:492:567/1
+AGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:875:950/1
+TACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:913:987/1
+GACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:721:796/1
+CCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:90:163/1
+AACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:113:187/1
+GGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:500:574/1
+CCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:512:584/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:781:855/1
+AACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:792:867/1
+TAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:713:786/1
+TGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:600:676/1
+CGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:564:640/1
+AAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:355:429/1
+CGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:382:458/1
+TGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:658:731/1
+GTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:849:923/1
+TTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:623:697/1
+CTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:659:735/1
+TTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:909:984/1
+AGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:498:571/1
+GACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:223:298/1
+CCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:151:226/1
+ATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:164:238/1
+GGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:665:740/1
+GTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATACTCTCCCGTCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:769:842/1
+CACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:855:931/1
+GCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:621:694/1
+TACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:754:828/1
+GCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:542:616/1
+TTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:586:662/1
+GAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:463:536/1
+GTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:449:523/1
+CATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:537:612/1
+CCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:728:802/1
+CTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:550:624/1
+ATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:749:824/1
+ATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:560:635/1
+GGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:145:219/1
+AAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:512:588/1
+CCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:541:615/1
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:422:498/1
+GTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:22:97/1
+CAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:819:895/1
+ATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:615:690/1
+CTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:559:633/1
+TGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:177:251/1
+TTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:285:359/1
+CGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:393:470/1
+TACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATGTTGAAAAGCGGTCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:566:640/1
+GCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:709:782/1
+TTCATGGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:297:372/1
+GTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:364:439/1
+CACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:541:614/1
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:196:270/1
+TACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:750:823/1
+TCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:192:268/1
+CGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:743:817/1
+GGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:302:376/1
+TTTAACAGAGGCTTGTTCAATACAGTGGGGCCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:456:530/1
+AAAAGCGGTCGCTTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:931:1004/1
+CAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:654:727/1
+GGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCGATTCATGGGGGGTCCGTATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:260:334/1
+TAGGCCTATCACCGACCTCGATAGTCGTTTCCTACGTGTAGTTTTAACAGAGGCTTGTTCAATACAGTGGGGCCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:468:542/1
+TTAAAATAGTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:558:633/1
+GTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:865:938/1
+TTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:75:149/1
+CTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:39:113/1
+CGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:175:251/1
+CATTACTTTGGGCGCCCCGTATACAGGGAATCACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:787:860/1
+TTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:206:281/1
+CACAGATCACCGTGCCTCCAGAGCCGCCCTGGGTTGCAGCTGGAACCAGCAAAGTAGGCCTATCACCGACCTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:509:584/1
+GCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:487:564/1
+CGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:515:590/1
+AACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:593:669/1
+CCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:25:100/1
+GTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:111:186/1
+CGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTACTTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:105:180/1
+TGCACACGGGAAATCTTCACATATAGGACTTCCTTGGGATAAGATAATTGCTCCTATTGGGGTTACACGCCATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:870:945/1
+GATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:633:708/1
+TGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGAATGACCAGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:378:452/1
+TTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTTGAAGTCCCTCGTGCCACCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:585:659/1
+CGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:25:99/1
+GTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACCGACCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:714:791/1
+GGGGGGTCCGTATACTCTCCCGTCTGTTAGGAATTATCTTGCCTGAAGCTAAGGCCACGGGATGCCCAACCCCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:554:628/1
+GCGCGTGGGAAAGCATGGAATAACGTAGGCTCGAACATCCCGACCACGTGGTAATTTTGGGCTCGGCTACTCCTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:65:140/1
+CCTGATATGTCTTGAGTGAATCCATAACCGACCAATTCCCTGCACACGGGAAATCTTCACATATAGGACTTCCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:849:925/1
+TTAGGCGCCCTTAGACTTCATGATACTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:894:970/1
+TTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:357:431/1
+GTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAATGGTGACTGGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:19:93/1
+CGACAAGTTGCCGTAATTTGCGTAAAATAGCACGAACCCGAAACTTCCTGATATGTCTTGAGTGAATCCATAACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:616:691/1
+TCGGCTACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:622:694/1
+ACTCCTGGCTGTGGAGCACGTAATGGTACCACGGGTGTTACAAGTGAAGGGATGGTCCTCGCCTATCTAAGCTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:484:557/1
+GATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCATTAGCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:874:949/1
+CTACCATAAATGAGGTCTCGTTCCATGATATAGGGAGATGACGAATGCTTGCCAAGACAAATGAATTTTCTGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:346:421/1
+TGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGTACTTCCTCCTCAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:931:1005/1
+CAAATGAATTTTCTGGGATGAGGCAGCGGGTCCGGGAGTCACGCCAAACATTGAAAAACTCTCCGACAACAGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:795:870/1
+ATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:476:552/1
+GTGAAATGGATCGCGAAGCGATGACCCATCATAGCACCGAACGCGCGAGCACCAACATTAGCCATATTAACGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:776:850/1
+TGCCCAACCCCTTTACTAGATGTTAGATTAAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:805:879/1
+AAGCGGGTACCAGTATCATTTCAAGCAGGAACCCTAGGAGACGCTTAGGCGCCCTTAGACTTCATGATACTACCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:332:407/1
+CCGGTCCAGGGGCATGTGTAATCCGGTGTCGTCACGAGGACAATGTTTGATGACTCTATGATACTCACCGTCAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq
new file mode 100644
index 0000000..0b3734f
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/reads_2.fq
@@ -0,0 +1,584 @@
+ at presence_absence1:1:768:842/2
+CGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:2:395:471/2
+TTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:3:732:807/2
+TATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:4:745:820/2
+ACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:5:245:320/2
+TATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:6:280:354/2
+CAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:7:617:690/2
+TAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:8:646:721/2
+TCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:9:239:312/2
+AGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:10:770:844/2
+TTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:11:198:273/2
+CATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:12:441:516/2
+TGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:13:650:724/2
+ACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:14:528:601/2
+CATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:15:840:915/2
+GTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:16:588:663/2
+GACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:17:536:610/2
+GGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:18:153:229/2
+AAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:19:427:501/2
+TATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:20:472:547/2
+AGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:21:498:573/2
+CCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:22:760:836/2
+CTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:23:808:884/2
+CGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:24:912:986/2
+CGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:25:151:225/2
+TACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:26:322:398/2
+TTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:27:790:866/2
+AATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:28:642:715/2
+AAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:29:944:1018/2
+GCCATTTGTTTGCGTGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:30:358:432/2
+TGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:31:626:700/2
+CCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:32:741:814/2
+CCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:33:419:493/2
+GCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:34:140:213/2
+ACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:35:443:517/2
+ATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:36:83:158/2
+GCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:37:472:548/2
+TAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:38:512:586/2
+AACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:39:637:709/2
+GTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:40:624:697/2
+GTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:41:454:528/2
+ACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:42:417:492/2
+CTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:43:928:1003/2
+GCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:44:214:287/2
+GACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:45:811:887/2
+ACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:46:492:567/2
+CGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:47:875:950/2
+GGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:48:913:987/2
+CCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:49:721:796/2
+CATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:50:90:163/2
+CCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:51:113:187/2
+TACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:52:500:574/2
+ACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:53:512:584/2
+CACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:54:781:855/2
+CTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:55:792:867/2
+AAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:56:713:786/2
+AAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAGTAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:57:600:676/2
+ATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:58:564:640/2
+CATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:59:355:429/2
+TGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:60:382:458/2
+TGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:61:658:731/2
+TAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:62:849:923/2
+GTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:63:623:697/2
+GTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:64:659:735/2
+CGCTTAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:65:909:984/2
+AGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:66:498:571/2
+ATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:67:223:298/2
+GTCCTCGTGACGACACCGGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:68:151:226/2
+CTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:69:164:238/2
+CCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:70:665:740/2
+GTACCCGCTTAATCTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:71:769:842/2
+CGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:72:855:931/2
+CAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:73:621:694/2
+GCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:74:754:828/2
+ATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:75:542:616/2
+TAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:76:586:662/2
+ACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:77:463:536/2
+AAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:78:449:523/2
+GTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:79:537:612/2
+TAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:80:728:802/2
+TAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:81:550:624/2
+ATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:82:749:824/2
+TGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:83:560:635/2
+ATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:84:145:219/2
+TAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:85:512:588/2
+GTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:86:541:615/2
+AGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:87:422:498/2
+TCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:88:22:97/2
+TGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:89:819:895/2
+ACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:90:615:690/2
+TAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:91:559:633/2
+CGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:92:177:251/2
+TGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:93:285:359/2
+TCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:94:393:470/2
+TAATATGGCTAATGTTGGTGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:95:566:640/2
+CATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:96:709:782/2
+GCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAGTAAAGGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:97:297:372/2
+GGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAACATTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:98:364:439/2
+GGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:99:541:614/2
+GATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:100:196:270/2
+GCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:101:750:823/2
+GGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:102:192:268/2
+CCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:103:743:817/2
+AGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:104:302:376/2
+TGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGTACTGACGGTGAGTATCATAGAGTCATCAAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:105:456:530/2
+CCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAATATGGCTAATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:106:931:1004/2
+TGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:107:654:727/2
+CTAACATCTAGTAAAGGGGTTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:108:260:334/2
+GTACTGACGGTGAGTATCATAGAGTCATCAAACATTGTCCTCGTGACGACACCGGATTACACATGCCCCTGGACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:109:468:542/2
+AGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTAATGCCGTTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:110:558:633/2
+CGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:111:865:938/2
+GAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:112:75:149/2
+GAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:113:39:113/2
+GCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:114:175:251/2
+TGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTATCGAGGTCGGTGATAGGCCTACTTTGCTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:115:787:860/2
+TTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:116:206:281/2
+GGATTACACATGCCCCTGGACCGGCCCCACTGTATTGAACAAGCCTCTGTTAAAACTACACGTAGGAAACGACTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:117:509:584/2
+CACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:118:487:564/2
+GCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:119:515:590/2
+TTGTAACACCCGTGGTACCATTACGTGCTCCACAGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:120:593:669/2
+CTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:121:25:100/2
+GCGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:122:111:186/2
+ACTTTGCTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:123:105:180/2
+CTGGTTCCAGCTGCAACCCAGGGCGGCTCTGGAGGCACGGTGATCTGTGATTCCCTGTATACGGGGCGCCCAAAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:124:870:945/2
+AACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:125:633:708/2
+TTGGGCATCCCGTGGCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:126:378:452/2
+TGCTCGCGCGTTCGGTGCTATGATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:127:585:659/2
+GGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:128:25:99/2
+CGTGTAACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:129:714:791/2
+AGTCTAAGGGCGCCTAAGCGTCTCCTAGGGTTCCTGCTTGAAATGATACTGGTACCCGCTTAATCTAACATCTAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:130:554:628/2
+GGTCATTCAGCTTAGATAGGCGAGGACCATCCCTTCACTTGTAACACCCGTGGTACCATTACGTGCTCCACAGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:131:65:140/2
+TGATCTGTGATTCCCTGTATACGGGGCGCCCAAAGTAATGGCGTGTAACCCCAATAGGAGCAATTATCTTATCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:132:849:925/2
+TTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCATCCCAGAAAATTCATTTGTCTTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:133:894:970/2
+CGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:134:357:431/2
+GATGGGTCATCGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:135:19:93/2
+ACCCCAATAGGAGCAATTATCTTATCCCAAGGAAGTCCTATATGTGAAGATTTCCCGTGTGCAGGGAATTGGTCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:136:616:691/2
+TTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCAGCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:137:622:694/2
+GCCTTAGCTTCAGGCAAGATAATTCCTAACAGACGGGAGAGTATACGGACCCCCCATGAATCGCCTGGTCATTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:138:484:557/2
+AGCCAGGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:139:874:949/2
+GATGAACAAGTGAGTTTTCAACTGTTGTCGGAGAGTTTTTCAATGTTTGGCGTGACTCCCGGACCCGCTGCCTCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:140:346:421/2
+CGCTTCGCGATCCATTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:141:931:1005/2
+GTGCCAGGCCAGACAGTACCGAGCTCATTACCCAGCGGTGGTCGGCTAAGGTCAGGGATGAACAAGTGAGTTTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:142:795:870/2
+AGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:143:476:552/2
+GGAGTAGCCGAGCCCAAAATTACCACGTGGTCGGGATGTTCGAGCCTACGTTATTCCATGCTTTCCCACGCGCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:144:776:850/2
+CAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTATGGTAGTATCATGAAGTCTAAGGGCGCCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:145:805:879/2
+CCTCATCCCAGAAAATTCATTTGTCTTGGCAAGCATTCGTCATCTCCCTATATCATGGAACGAGACCTCATTTAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at presence_absence1:146:332:407/2
+TTTCACTATTTTAAGCGACCGCTTTTCAACATGGTGGCACGAGGGACTTCAACCAGTCACCATTGAGGAGGAAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa b/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa
new file mode 100644
index 0000000..c7a01d8
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_multiple_vars/references.fa
@@ -0,0 +1,5 @@
+>presence_absence1
+ATGGATCGCGAAGCGATGACCCATGAAGCGACCGAACGCGCGAGCACCAACATTAGCCAT
+ATTAACGGCATTAGCGCGTGGGAAAGCATGGAATAA
+>presence_absence2
+ATGGCGTGCGATGAATTTGGCCATATTAAACTGATGAACCCGCAGCGCAGCACCTAA
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
@@ -1,6 +1,6 @@
>noncoding1
CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
>noncoding2
TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
similarity index 77%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.fa
@@ -1,6 +1,6 @@
>noncoding1
CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
>noncoding2
TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv
new file mode 100644
index 0000000..ab4e2bd
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.in.tsv
@@ -0,0 +1,6 @@
+noncoding1 0 0 . . generic description of noncoding1
+noncoding1 0 0 A6G . variant in ref and reads so should report
+noncoding1 0 0 G9T . wild type in ref and reads
+noncoding1 0 0 A14T . ref has wild type, reads has variant so should report
+noncoding1 0 0 A40C . ref has variant, reads has wild type
+noncoding2 0 0 . . generic description of noncoding2
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
similarity index 50%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
index 3278f9e..efcabae 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa
@@ -1,6 +1,9 @@
->noncoding1
+>noncoding1_closest
CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
+>noncoding1
+CGTACGCGGGTAGAGACATGTACTCCACTCACATACATCCCTAAGTTTGTCCCTAAGGCA
+GTGCCCGCCGCCCACGAACGAATGCGCTGAGATGCTTAGTGAACGCCTATCCGAGATCAC
>noncoding2
TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh
new file mode 100644
index 0000000..37f5b75
Binary files /dev/null and b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster.mash.fa.msh differ
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq
new file mode 100644
index 0000000..4356e47
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_1.fq
@@ -0,0 +1,144 @@
+ at noncoding1:1:77:136/1
+CATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATTGCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:2:48:107/1
+CTGAGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:3:98:159/1
+TCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:4:126:185/1
+CGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:5:26:85/1
+CGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:6:85:145/1
+CCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:7:53:112/1
+TGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:8:110:170/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:9:73:132/1
+GTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTAAGGCATT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:10:51:110/1
+AGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:11:123:183/1
+GCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:12:63:122/1
+TACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:13:91:150/1
+CCATACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:14:7:68/1
+GACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:15:104:163/1
+AGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:16:1:60/1
+CGTATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:17:64:123/1
+ACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:18:128:185/1
+CCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:19:28:88/1
+TAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:20:97:157/1
+ATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:21:22:81/1
+CGTACGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:22:95:155/1
+ACATCACTAAGTTTGTCCCTAAGGCATTGCCCGCCGCCCACGAACGAACT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:23:119:176/1
+CATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:24:110:169/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:25:110:170.dup.2/1
+TCCCTAAGGCATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:26:57:117/1
+GCGACGTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:27:41:100/1
+CTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGACATGTACTCCACTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:28:18:78/1
+CGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:29:6:65/1
+CGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:30:3:63/1
+TATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:31:66:124/1
+GCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTCCCTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:32:62:122/1
+GTACGCGGGTGGTGACATGTACTCCACTCCCATACATCACTAAGTTTGTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:33:32:91/1
+GTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGACATGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:34:28:86/1
+TAGCGTACTGAGTCTACTGACTGAGTGAAGCGACGTACGCGGGTGGTGAC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:35:3:64/1
+TATCGACTTGACGATCGTACGTACGTAGCGTACTGAGTCTACTGACTGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:36:120:181/1
+ATTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq
new file mode 100644
index 0000000..b3ba738
--- /dev/null
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/reads_2.fq
@@ -0,0 +1,144 @@
+ at noncoding1:1:77:136/2
+TGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:2:48:107/2
+GCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:3:98:159/2
+CGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:4:126:185/2
+AGATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:5:26:85/2
+TGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:6:85:145/2
+GCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:7:53:112/2
+CCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:8:110:170/2
+TATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:9:73:132/2
+TGTGATCTCGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:10:51:110/2
+TAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:11:123:183/2
+ATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:12:63:122/2
+GATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:13:91:150/2
+TAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGCATCTC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:14:7:68/2
+CTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:15:104:163/2
+CGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:16:1:60/2
+CAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGTACGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:17:64:123/2
+GGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:18:128:185/2
+AGATCCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:19:28:88/2
+TCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:20:97:157/2
+TCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:21:22:81/2
+CGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:22:95:155/2
+GCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGATGGCGTTCCCTAAGC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:23:119:176/2
+CGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:24:110:169/2
+ATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:25:110:170.dup.2/2
+TATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAATGTGATCTCGGA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:26:57:117/2
+CGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCC
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:27:41:100/2
+ACCGCAGTTCGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:28:18:78/2
+CGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTGGAGTACAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:29:6:65/2
+AGGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:30:3:63/2
+GGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGTA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:31:66:124/2
+CGGATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:32:62:122/2
+GATGGCGTTCCCTAAGCATCTCACCGCAGTTCGTTCGTGGGCGGCGGGCA
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:33:32:91/2
+CGTTCGTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:34:28:86/2
+GTGGGCGGCGGGCAATGCCTTAGGGACAAACTTAGTGATGTATGGGAGTG
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:35:3:64/2
+GGGACAAACTTAGTGATGTATGGGAGTGGAGTACATGTCACCACCCGCGT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
+ at noncoding1:36:120:181/2
+CCGCGCGAGAGTATATATCGCTCGTCGCTGATAGCTGCTCGCTCGTGAAT
++
+IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII
diff --git a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
similarity index 77%
copy from ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
copy to ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
index 3278f9e..2a9253d 100644
--- a/ariba/tests/data/cluster_test_full_run_ok_non_coding.fa
+++ b/ariba/tests/data/cluster_test_full_run_ref_not_in_cluster/references.fa
@@ -1,6 +1,6 @@
>noncoding1
CGTACGCGGGTGGAGACATGTACTCCACTCCCATACATCCCTAAGTTTGTCCCTAAGGCA
-GTGCCCGCCGCCCACGAACGAACTGCGGTGAGATGCTTAGGGAACGCCATCCGAGATCAC
+GTGCCCGCCGCCCACGAACGAATGCGGTGAGATGCTTAGGGAACGCCTATCCGAGATCAC
>noncoding2
TCTTTAACTGTTCACGACTGTATCGCGGCTTGCAAATCTTAAGTTCTTCCCAAGCGCGCT
GCGATACAAATCCCAAGTTTAGCGGACAGTTCACGCCGGGTTCTAAGAATGTATGCGTCC
diff --git a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
index 278d834..dc569d7 100644
--- a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
+++ b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.clstr_count
@@ -1,2 +1,2 @@
-cluster1 1628 123728
-cluster2 1952 148352
+cluster1 1624 123424
+cluster2 1946 147896
diff --git a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
index dec7abd..de6fe18 100644
--- a/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
+++ b/ariba/tests/data/clusters_minimap_reads_to_all_refs.out.hist
@@ -1,31 +1,26 @@
-191 1
-194 3
-196 4
-201 3
-202 3
-203 2
-204 4
-209 8
-210 3
+194 2
+204 1
+209 5
+210 2
211 1
-212 19
-213 30
-214 43
+212 15
+213 29
+214 42
215 58
-216 83
+216 80
217 100
-218 116
+218 114
219 123
-220 151
-221 170
-222 128
+220 150
+221 168
+222 127
223 144
-224 108
-225 131
+224 105
+225 128
226 103
-227 79
+227 78
228 79
-229 45
+229 44
230 22
231 16
232 1
diff --git a/ariba/tests/data/clusters_test_dummy_db.fa.msh b/ariba/tests/data/clusters_test_dummy_db.fa.msh
new file mode 100644
index 0000000..a208432
Binary files /dev/null and b/ariba/tests/data/clusters_test_dummy_db.fa.msh differ
diff --git a/ariba/tests/data/clusters_test_write_report.tsv b/ariba/tests/data/clusters_test_write_report.tsv
index 9851c6a..c348621 100644
--- a/ariba/tests/data/clusters_test_write_report.tsv
+++ b/ariba/tests/data/clusters_test_write_report.tsv
@@ -1,3 +1,3 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
gene1 line1
gene2 line2
diff --git a/ariba/tests/data/reference_data_init_ok.rename.tsv b/ariba/tests/data/reference_data_init_ok.rename.tsv
new file mode 100644
index 0000000..7f75bb0
--- /dev/null
+++ b/ariba/tests/data/reference_data_init_ok.rename.tsv
@@ -0,0 +1,2 @@
+original_gene1 gene1
+original_gene2 gene2
diff --git a/ariba/tests/data/reference_data_load_rename_file.tsv b/ariba/tests/data/reference_data_load_rename_file.tsv
new file mode 100644
index 0000000..ad2a3e0
--- /dev/null
+++ b/ariba/tests/data/reference_data_load_rename_file.tsv
@@ -0,0 +1,2 @@
+original1 ariba1
+original2 ariba2
diff --git a/ariba/tests/data/report_filter_test_init_bad.tsv b/ariba/tests/data/report_filter_test_init_bad.tsv
index f93b0f5..b29210d 100644
--- a/ariba/tests/data/report_filter_test_init_bad.tsv
+++ b/ariba/tests/data/report_filter_test_init_bad.tsv
@@ -1,4 +1,4 @@
-#ef_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id3:baz free_text3
+#ariba_ref_name ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster2 cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id3:baz free_text3
diff --git a/ariba/tests/data/report_filter_test_init_good.tsv b/ariba/tests/data/report_filter_test_init_good.tsv
index 2d67b83..4209e31 100644
--- a/ariba/tests/data/report_filter_test_init_good.tsv
+++ b/ariba/tests/data/report_filter_test_init_good.tsv
@@ -1,5 +1,5 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 12.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id3:spam free_text3
-cluster2 1 0 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 20.2 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:v:I42L:id4:eggs free_text3
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 10.5 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 12.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id3:spam free_text3
+ariba_cluster2 cluster2 1 0 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 20.2 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:v:I42L:id4:eggs free_text3
diff --git a/ariba/tests/data/report_filter_test_load_report_bad.tsv b/ariba/tests/data/report_filter_test_load_report_bad.tsv
index 553e60f..09c1820 100644
--- a/ariba/tests/data/report_filter_test_load_report_bad.tsv
+++ b/ariba/tests/data/report_filter_test_load_report_bad.tsv
@@ -1,4 +1,4 @@
-#ef_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id1:bar free_text2
-cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id1:foo free_text3
+#ariba_ref_name ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 non_coding 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id1:bar free_text2
+ariba_cluster2 cluster2 variants_only 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id1:foo free_text3
diff --git a/ariba/tests/data/report_filter_test_load_report_good.tsv b/ariba/tests/data/report_filter_test_load_report_good.tsv
index 704b716..9897eb7 100644
--- a/ariba/tests/data/report_filter_test_load_report_good.tsv
+++ b/ariba/tests/data/report_filter_test_load_report_good.tsv
@@ -1,5 +1,5 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 22.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id3:spam free_text3
-cluster2 1 1 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 33.3 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:v:I42L:id4:eggs free_text3
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.2 1300 22.2 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id3:spam free_text3
+ariba_cluster2 cluster2 1 1 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 33.3 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:v:I42L:id4:eggs free_text3
diff --git a/ariba/tests/data/report_filter_test_run.expected.tsv b/ariba/tests/data/report_filter_test_run.expected.tsv
index 1bd0ab8..f179170 100644
--- a/ariba/tests/data/report_filter_test_run.expected.tsv
+++ b/ariba/tests/data/report_filter_test_run.expected.tsv
@@ -1,6 +1,6 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster2 1 1 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id3:baz free_text3
-cluster4 1 1 179 20000 cluster4 1042 1042 99.0 cluster4.scaffold.1 1442 14.6 . . . . . . . . . . . . . . . . . free_text3
-cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 65 265 A;A 766 766 G;C 88;90 .;. 87;90 . .'
-cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 Q37fs FSHIFT 109 109 A 634 634 . 67 . 67 . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster2 cluster2 1 1 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id3:baz free_text3
+ariba_cluster4 cluster4 1 1 179 20000 cluster4 1042 1042 99.0 cluster4.scaffold.1 1442 14.6 . . . . . . . . . . . . . . . . . free_text3
+ariba_cluster5 cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 65 265 A;A 766 766 G;C 88;90 G;C 87;90 . .'
+ariba_cluster5 cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 Q37fs FSHIFT 109 109 A 634 634 . 67 . 67 . .
diff --git a/ariba/tests/data/report_filter_test_run.in.tsv b/ariba/tests/data/report_filter_test_run.in.tsv
index b6e46a4..08d3d93 100644
--- a/ariba/tests/data/report_filter_test_run.in.tsv
+++ b/ariba/tests/data/report_filter_test_run.in.tsv
@@ -1,9 +1,9 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 0 0 27 10000 cluster1 1000 0 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster2 1 1 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id3:baz free_text3
-cluster3 1 1 179 20000 cluster3 1042 1042 89.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id4:spam free_text3
-cluster4 1 1 179 20000 cluster4 1042 1042 99.0 cluster4.scaffold.1 1442 14.6 1 SNP p I42L 1 I42L SYN 112 112 C 442 442 T 300 . 290 a:n:I42L:id5:eggs free_text3
-cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 65 265 A;A 766 766 G;C 88;90 .;. 87;90 . .'
-cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 Q37fs FSHIFT 109 109 A 634 634 . 67 . 67 . .
-cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 265 265 A;A 766 766 G;C 88;90 .;. 87;90 . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 0 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 12.4 1 SNP n A51G 1 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster2 cluster2 1 1 179 20000 cluster2 1042 1042 99.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id3:baz free_text3
+ariba_cluster3 cluster3 1 1 179 20000 cluster3 1042 1042 89.0 cluster2.scaffold.1 1442 13.5 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id4:spam free_text3
+ariba_cluster4 cluster4 1 1 179 20000 cluster4 1042 1042 99.0 cluster4.scaffold.1 1442 14.6 1 SNP p I42L 1 I42L SYN 112 112 C 442 442 T 300 T 290 a:n:I42L:id5:eggs free_text3
+ariba_cluster5 cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 65 265 A;A 766 766 G;C 88;90 G;C 87;90 . .'
+ariba_cluster5 cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 Q37fs FSHIFT 109 109 A 634 634 . 67 . 67 . .
+ariba_cluster5 cluster5 1 0 528 1874 cluster5 1188 1097 92.43 cluster5.scaffold.1 2218 20.0 0 . p . 0 E89G NONSYN 265 265 A;A 766 766 G;C 88;90 G;C 87;90 . .
diff --git a/ariba/tests/data/report_filter_test_write_report.tsv b/ariba/tests/data/report_filter_test_write_report.tsv
index 1cafa7b..6157897 100644
--- a/ariba/tests/data/report_filter_test_write_report.tsv
+++ b/ariba/tests/data/report_filter_test_write_report.tsv
@@ -1,4 +1,4 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 . 500 a:n:C42T:id1:foo free_text
-cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 . 542 a:n:A51G:id2:bar free_text2
-cluster2 1 1 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 42.4 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 . 290 a:v:I42L:id3:baz free_text3
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n C42T 0 . . 42 42 C 142 142 C 500 C 500 a:n:C42T:id1:foo free_text
+ariba_cluster1 cluster1 0 0 27 10000 cluster1 1000 999 99.42 cluster1.scaffold.1 1300 42.4 1 SNP n A51G 0 . . 51 51 C 151 151 C 542 C 542 a:n:A51G:id2:bar free_text2
+ariba_cluster2 cluster2 1 1 179 20000 cluster2 1042 1042 42.42 cluster2.scaffold.1 1442 42.4 1 SNP p I42L 1 I42L NONSYN 112 112 C 442 442 T 300 T 290 a:v:I42L:id3:baz free_text3
diff --git a/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv b/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
index c652f1c..642064f 100644
--- a/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
+++ b/ariba/tests/data/summary_gather_unfiltered_output_data.in.1.tsv
@@ -1,6 +1,6 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A42T 1 A42T SNP 42 42 A 84 84 T 17 . 17 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-presence_absence_ref1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 0 SNP p A10V . A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
-presence_absence_ref2 1 0 528 232 presence_absence2 1005 554 99.1 presence_absence2.scaffold.1 1032 22.3 0 . p . 0 V175L NONSYN 522 522 G 265 265 C 36 . 36 . Description foo bar
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A42T 1 A42T SNP 42 42 A 84 84 T 17 T 17 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 T,G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_presence_absence_ref1 presence_absence_ref1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 0 SNP p A10V . A10V NONSYN 27 29 GCA 112 114 GTA 29;28;27 G;T;A 29;28;27 presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_presence_absence_ref2 presence_absence_ref2 1 0 528 232 presence_absence2 1005 554 99.1 presence_absence2.scaffold.1 1032 22.3 0 . p . 0 V175L NONSYN 522 524 GTA 265 267 CTA 36;37;34 C;T;A 36;37;34 . Description foo bar
diff --git a/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv b/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
index 4a23ebc..465a64e 100644
--- a/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
+++ b/ariba/tests/data/summary_gather_unfiltered_output_data.in.2.tsv
@@ -1,6 +1,6 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 G 40,10 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:0:0:A6G:id3:variant in ref and reads so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 0 SNP p A10V . A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
-variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 T,G 40,10 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:0:0:A6G:id3:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 T,G 20,30 noncoding_ref2:0:0:A52T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_presence_absence1 presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 0 SNP p A10V . A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;30 presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_variants_only1 variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
diff --git a/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv b/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
index 159949c..15ecd21 100644
--- a/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
+++ b/ariba/tests/data/summary_sample_test_column_names_tuples_and_het_snps.tsv
@@ -1,8 +1,8 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 G 40,10 noncoding1:0:0:A14T:.:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 . 17 . generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
-variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 A;C;C 96 98 A;C;C 12;13;13 .;.;. 12;13;13 variants_only1:1:0:S5T:.:Ref and reads have variant so report Generic description of variants_only1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 T,G 40,10 noncoding1:0:0:A14T:.:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 T 17 . generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
+ariba_variants_only1 variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 ACC 96 98 ACC 12;13;13 A;C;C 12;13;13 variants_only1:1:0:S5T:.:Ref and reads have variant so report Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_column_summary_data.tsv b/ariba/tests/data/summary_sample_test_column_summary_data.tsv
index 9c495ec..fa56464 100644
--- a/ariba/tests/data/summary_sample_test_column_summary_data.tsv
+++ b/ariba/tests/data/summary_sample_test_column_summary_data.tsv
@@ -1,8 +1,8 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 . 17 . generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
-variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 A;C;C 96 98 A;C;C 12;13;13 .;.;. 12;13;13 variants_only1:1:0:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 T 17 . generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
+ariba_variants_only1 variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 ACC 96 98 ACC 12;13;13 A;C;C 12;13;13 variants_only1:1:0:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_load_file.in.tsv b/ariba/tests/data/summary_sample_test_load_file.in.tsv
index a125211..d390d82 100644
--- a/ariba/tests/data/summary_sample_test_load_file.in.tsv
+++ b/ariba/tests/data/summary_sample_test_load_file.in.tsv
@@ -1,7 +1,7 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
-variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 A;C;C 96 98 A;C;C 12;13;13 .;.;. 12;13;13 variants_only1:p:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
+ariba_variants_only1 variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 ACC 96 98 ACC 12;13;13 A;C;C 12;13;13 variants_only1:p:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_non_synon_variants.tsv b/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
index b8f5753..c80f2bc 100644
--- a/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
+++ b/ariba/tests/data/summary_sample_test_non_synon_variants.tsv
@@ -1,8 +1,8 @@
-#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 . 17 . generic description of noncoding1
-noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+#ref_name ref_type flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 0 SNP . . . G15T SNP 15 15 G 85 85 T 17 T 17 . generic description of noncoding1
+noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
noncoding1 non_coding 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
-presence_absence1 presence_absence 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+presence_absence1 presence_absence 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
presence_absence1 presence_absence 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
-variants_only1 variants_only 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 A;C;C 96 98 A;C;C 12;13;13 .;.;. 12;13;13 variants_only1:p:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
+variants_only1 variants_only 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 ACC 96 98 ACC 12;13;13 A;C;C 12;13;13 variants_only1:p:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
diff --git a/ariba/tests/data/summary_sample_test_var_groups.tsv b/ariba/tests/data/summary_sample_test_var_groups.tsv
index 3352660..5360e33 100644
--- a/ariba/tests/data/summary_sample_test_var_groups.tsv
+++ b/ariba/tests/data/summary_sample_test_var_groups.tsv
@@ -1,7 +1,7 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
-noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
-presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
-variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 A;C;C 96 98 A;C;C 12;13;13 .;.;. 12;13;13 variants_only1:1:0:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding1.scaffold.1 279 35.4 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:0:0:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 cluster.n 120 120 98.33 noncoding2.scaffold.1 279 35.4 . . . . . . . . . . . . . . . . . generic description of noncoding2
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.1 267 35.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;20;31 presence_absence1:1:0:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_presence_absence1 presence_absence1 1 0 27 88 cluster.p 96 96 98.96 presence_absence1.scaffold.2 267 35.1 . . . . . . . . . . . . . . . . . Generic description of presence_absence2
+ariba_variants_only1 variants_only1 1 1 27 64 cluster.v 90 90 100.0 variants_only1.scaffold.1 260 42.4 1 SNP p S5T 1 . . 13 15 ACC 96 98 ACC 12;13;13 A;C;C 12;13;13 variants_only1:1:0:S5T:id4:Ref and reads have variant so report Generic description of variants_only1
diff --git a/ariba/tests/data/summary_test_load_input_files.1.tsv b/ariba/tests/data/summary_test_load_input_files.1.tsv
index 1b683a4..e0a7d79 100644
--- a/ariba/tests/data/summary_test_load_input_files.1.tsv
+++ b/ariba/tests/data/summary_test_load_input_files.1.tsv
@@ -1,3 +1,3 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:p:A10V:id2:Ref has wild, reads have variant so report Generic description of presence_absence1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_presence_absence1 presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:p:A10V:id2:Ref has wild, reads have variant so report Generic description of presence_absence1
diff --git a/ariba/tests/data/summary_test_load_input_files.2.tsv b/ariba/tests/data/summary_test_load_input_files.2.tsv
index ccaa3d7..541dac5 100644
--- a/ariba/tests/data/summary_test_load_input_files.2.tsv
+++ b/ariba/tests/data/summary_test_load_input_files.2.tsv
@@ -1,5 +1,5 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
-presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 1 SNP p A10V 1 A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
-variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1 noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding1:n:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding1 noncoding1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:n:A6G:id2:variant in ref and reads so should report generic description of noncoding1
+ariba_presence_absence1 presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 1 SNP p A10V 1 A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:p:A10V:id3:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_variants_only1 variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
diff --git a/ariba/tests/data/summary_test_whole_run.in.1.tsv b/ariba/tests/data/summary_test_whole_run.in.1.tsv
index 4321687..dfc4d2b 100644
--- a/ariba/tests/data/summary_test_whole_run.in.1.tsv
+++ b/ariba/tests/data/summary_test_whole_run.in.1.tsv
@@ -1,16 +1,18 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1_ref1 0 0 19 100 noncoding1 100 100 99.1 noncoding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of noncoding1
-noncoding2_ref1 0 0 19 100 noncoding2 200 200 98.2 noncoding2.scaffold.1 250 42.42 . . . . . . . . . . . . . . . . . generic description of noncoding2
-noncoding5_ref1 0 1 531 100 noncoding5 100 100 97.4 noncoding5.scaffold.1 200 14.1 1 SNP n A42T 1 A42T SNP 42 42 A 50 50 T 40 . 40 noncoding5_ref1:0:1:A42T:.:description of A42T .
-noncoding6_ref1 0 0 531 100 noncoding6 100 100 95.5 noncoding6.scaffold.1 200 24.32 1 SNP n A52T 1 A52T SNP 52 52 A 70 70 T 100 C 70,30 noncoding6_ref1:0:1:A52T:.:description of A52T .
-noncoding7_ref1 0 0 531 100 noncoding7 100 100 95.4 noncoding7.scaffold.1 200 24.31 1 SNP n A53T 1 A53T SNP 53 53 A 70 70 T 100 C 70,1 noncoding7_ref1:0:1:A53T:.:description of A53T .
-noncoding8_ref1 0 0 531 100 noncoding8 100 100 95.3 noncoding8.scaffold.1 200 24.29 1 SNP n A54T 0 . . 54 54 A 70 70 A 100 . . noncoding8_ref1:0:1:A54T:.:description of A54T .
-noncoding9_ref1 0 1 531 100 noncoding9 100 100 95.2 noncoding9.scaffold.1 200 24.28 1 SNP n A55T 0 . . 55 55 A 70 70 A 100 . . noncoding9_ref1:0:1:A55T:.:description of A55T .
-noncoding10_ref1 0 0 531 100 noncoding10 100 100 95.1 noncoding10.scaffold.1 200 24.27 0 . n . 0 C100T SNP 100 100 C 150 150 T 100 A 99,1 . .
-noncoding11_ref1 0 0 531 100 noncoding11 100 100 95.05 noncoding11.scaffold.1 200 24.26 0 HET . . . G101A . 100 100 G 150 150 G 100 A 70,30 . .
-coding1_ref1 1 0 19 100 coding1 100 100 99.1 coding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of coding1
-coding2_ref1 1 0 27 100 coding2 200 200 98.2 coding2.scaffold.1 250 42.42 . . . . . . . . . . . . . . . . . generic description of coding2
-coding5_ref1 1 1 539 100 coding5 100 100 97.4 coding5.scaffold.1 200 14.1 1 SNP p A42S 0 . . 142 144 A;G;A 50 52 A;G;A 60;61;62 .;.;. 60;61;62 coding5_ref1:0:1:A42S:.:description of A42S .
-coding6_ref1 1 0 539 100 coding6 100 100 95.5 coding6.scaffold.1 200 24.32 1 SNP p A52S 1 A52S NONSYN 152 152 A 70 70 T 50 . 50 coding6_ref1:0:1:A52S:.:description of A52S .
-coding7_ref1 1 1 539 100 coding7 100 100 95.4 coding7.scaffold.1 200 24.32 1 SNP p A53S 0 . . 152 154 A;G;A 70 70 A;G;A 71;72;73 .;.;. 71;72;73 coding7_ref1:0:1:A53S:.:description of A53S .
-coding8_ref1 1 0 539 100 coding8 100 100 95.3 coding8.scaffold.1 200 24.31 0 . p . 0 A53S NONSYN 160 160 A 75 75 T 100 G 100,1 . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1_ref1 noncoding1_ref1 0 0 19 100 noncoding1 100 100 99.1 noncoding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of noncoding1
+ariba_noncoding2_ref1 noncoding2_ref1 0 0 19 100 noncoding2 200 200 98.2 noncoding2.scaffold.1 250 42.42 . . . . . . . . . . . . . . . . . generic description of noncoding2
+ariba_noncoding5_ref1 noncoding5_ref1 0 1 531 100 noncoding5 100 100 97.4 noncoding5.scaffold.1 200 14.1 1 SNP n A42T 1 A42T SNP 42 42 A 50 50 T 40 T 40 noncoding5_ref1:0:1:A42T:.:description of A42T .
+ariba_noncoding6_ref1 noncoding6_ref1 0 0 531 100 noncoding6 100 100 95.5 noncoding6.scaffold.1 200 24.32 1 SNP n A52T 1 A52T SNP 52 52 A 70 70 T 100 T,C 70,30 noncoding6_ref1:0:1:A52T:.:description of A52T .
+ariba_noncoding7_ref1 noncoding7_ref1 0 0 531 100 noncoding7 100 100 95.4 noncoding7.scaffold.1 200 24.31 1 SNP n A53T 1 A53T SNP 53 53 A 70 70 T 100 T,C 70,1 noncoding7_ref1:0:1:A53T:.:description of A53T .
+ariba_noncoding8_ref1 noncoding8_ref1 0 0 531 100 noncoding8 100 100 95.3 noncoding8.scaffold.1 200 24.29 1 SNP n A54T 0 . . 54 54 A 70 70 A 100 . . noncoding8_ref1:0:1:A54T:.:description of A54T .
+ariba_noncoding9_ref1 noncoding9_ref1 0 1 531 100 noncoding9 100 100 95.2 noncoding9.scaffold.1 200 24.28 1 SNP n A55T 0 . . 55 55 A 70 70 A 100 . . noncoding9_ref1:0:1:A55T:.:description of A55T .
+ariba_noncoding10_ref1 noncoding10_ref1 0 0 531 100 noncoding10 100 100 95.1 noncoding10.scaffold.1 200 24.27 0 . n . 0 C100T SNP 100 100 C 150 150 T 100 T,A 99,1 . .
+ariba_noncoding11_ref1 noncoding11_ref1 0 0 531 100 noncoding11 100 100 95.05 noncoding11.scaffold.1 200 24.26 0 HET . . . G101A . 100 100 G 150 150 G 100 G,A 70,30 . .
+ariba_coding1_ref1 coding1_ref1 1 0 19 100 coding1 100 100 99.1 coding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of coding1
+ariba_coding2_ref1 coding2_ref1 1 0 27 100 coding2 200 200 98.2 coding2.scaffold.1 250 42.42 . . . . . . . . . . . . . . . . . generic description of coding2
+ariba_coding5_ref1 coding5_ref1 1 1 539 100 coding5 100 100 97.4 coding5.scaffold.1 200 14.1 1 SNP p A42S 0 . . 142 144 GCA 50 52 GCA 60;61;62 G;C;A 60;61;62 coding5_ref1:0:1:A42S:.:description of A42S .
+ariba_coding6_ref1 coding6_ref1 1 0 539 100 coding6 100 100 95.5 coding6.scaffold.1 200 24.32 1 SNP p A52S 1 A52S NONSYN 151 153 GCA 68 70 TCA 50;49;51 T;C;A 50;49;51 coding6_ref1:0:1:A52S:.:description of A52S .
+ariba_coding7_ref1 coding7_ref1 1 1 539 100 coding7 100 100 95.4 coding7.scaffold.1 200 24.32 1 SNP p A53S 0 . . 154 156 GCA 71 73 GCA 71;72;73 G;C;A 71;72;73 coding7_ref1:0:1:A53S:.:description of A53S .
+ariba_coding8_ref1 coding8_ref1 1 0 539 100 coding8 100 100 95.3 coding8.scaffold.1 200 24.31 0 . p . 0 A53S NONSYN 160 162 GCA 74 76 TCA 100,1;95,99 T,G;C;A 100,1;95;99 . .
+ariba_23S.rDNA_WHO_F_01358c 23S.rDNA_WHO_F_01358c 0 1 531 9914 23S 2890 2890 99.86 23S.scaffold.1 3120 744.8 1 SNP n C2597T 1 C2597T SNP 2597 2597 C 2755 2755 T 823 TC,T 487,1 23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T. High-level resistance to Azithromycin
+ariba_mdfA.3001328.JQ394987.0_1233.561 mdfA.3001328.JQ394987.0_1233.561 1 0 659 336 mdfA 1233 1233 97.0 mdfA.scaffold.1 1464 16.2 0 HET . . . G261GGGTGTGGTGTGGT,GGGTGTGGT . 261 261 G 282 282 G 20 GGGTGTGGTGTGGT,GGGTGTGGT 17,2 . mdfA;Multidrug translocase MdfA
diff --git a/ariba/tests/data/summary_test_whole_run.in.2.tsv b/ariba/tests/data/summary_test_whole_run.in.2.tsv
index b92dd94..c6c4caa 100644
--- a/ariba/tests/data/summary_test_whole_run.in.2.tsv
+++ b/ariba/tests/data/summary_test_whole_run.in.2.tsv
@@ -1,7 +1,8 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding1_ref2 0 0 19 100 noncoding1 100 100 99.2 noncoding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of noncoding1
-noncoding3_ref1 0 0 19 100 noncoding3 242 241 97.6 noncoding3.scaffold.1 300 37.6 . . . . . . . . . . . . . . . . . generic description of noncoding3
-noncoding5_ref1 0 1 531 100 noncoding5 100 100 99.42 noncoding5.scaffold.1 200 14.1 1 SNP n A42T 0 A42T SNP 42 42 A 50 50 A 20 . 20 . .
-coding1_ref2 1 0 27 100 coding1 100 100 99.2 coding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of coding1
-coding3_ref1 1 0 27 100 coding3 242 241 97.6 coding3.scaffold.1 300 37.6 . . . . . . . . . . . . . . . . . generic description of coding3
-coding5_ref1 1 1 539 100 coding5 100 100 97.4 coding5.scaffold.1 200 14.1 1 SNP p A42S 1 A42S NONSYN 142 144 A 50 50 T 65 . 65 coding5_ref1:0:1:A42S:.:description of A42S .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding1_ref2 noncoding1_ref2 0 0 19 100 noncoding1 100 100 99.2 noncoding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of noncoding1
+ariba_noncoding3_ref1 noncoding3_ref1 0 0 19 100 noncoding3 242 241 97.6 noncoding3.scaffold.1 300 37.6 . . . . . . . . . . . . . . . . . generic description of noncoding3
+ariba_noncoding5_ref1 noncoding5_ref1 0 1 531 100 noncoding5 100 100 99.42 noncoding5.scaffold.1 200 14.1 1 SNP n A42T 0 A42T SNP 42 42 A 50 50 A 20 A 20 noncoding5_ref1:0:1:A42T:.:description of A42T .
+ariba_coding1_ref2 coding1_ref2 1 0 27 100 coding1 100 100 99.2 coding1.scaffold.1 150 10.1 . . . . . . . . . . . . . . . . . generic description of coding1
+ariba_coding3_ref1 coding3_ref1 1 0 27 100 coding3 242 241 97.6 coding3.scaffold.1 300 37.6 . . . . . . . . . . . . . . . . . generic description of coding3
+ariba_coding5_ref1 coding5_ref1 1 1 539 100 coding5 100 100 97.4 coding5.scaffold.1 200 14.1 1 SNP p A42S 1 A42S NONSYN 142 144 GCA 50 52 TCA 65;64;63 T;C;A 65;64;63 coding5_ref1:0:1:A42S:.:description of A42S .
+ariba_23S.rDNA_WHO_F_01358c 23S.rDNA_WHO_F_01358c 0 1 659 4168 23S 2890 2890 99.84 23S.scaffold.1 3628 344.0 1 SNP n C2597T 0 . . 2597 2597 C 2928 2928 C 410 C,T 301,44 23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T. High-level resistance to Azithromycin. .
diff --git a/ariba/tests/data/summary_test_whole_run.out.csv b/ariba/tests/data/summary_test_whole_run.out.csv
index 9bd78e5..1a282e3 100644
--- a/ariba/tests/data/summary_test_whole_run.out.csv
+++ b/ariba/tests/data/summary_test_whole_run.out.csv
@@ -1,3 +1,3 @@
-name,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.known_var,coding6.A52S,coding7.assembled,coding7.ref_seq,coding7.pct_id,coding8.assembled,coding8.match,coding8.ref_seq,coding8.pct_i [...]
-/home/ubuntu/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.1.tsv,interrupted,no,coding1_ref1,99.1,yes,yes,coding2_ref1,98.2,no,no,NA,NA,yes,no,coding5_ref1,97.4,no,no,yes,yes,coding6_ref1,95.5,yes,yes,yes,coding7_ref1,95.4,yes,yes,coding8_ref1,95.3,yes,yes,yes,yes,noncoding1_ref1,99.1,yes,yes,noncoding10_ref1,95.1,yes,yes,99.0,yes,yes,noncoding11_ref1,95.05,yes,het,30.0,yes,yes,noncoding2_ref1,98.2,no,no,NA,NA,yes,yes,noncoding5_ref1,97.4,yes,yes,yes,yes,noncoding6_re [...]
-/home/ubuntu/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.2.tsv,yes,yes,coding1_ref2,99.2,no,no,NA,NA,yes,yes,coding3_ref1,97.6,yes,yes,coding5_ref1,97.4,yes,yes,no,no,NA,NA,NA,NA,no,NA,NA,no,no,NA,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,yes,yes,noncoding3_ref1,97.6,yes,no,noncoding5_ref1,99.42,no,no,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,no,NA,NA
+name,23S.assembled,23S.match,23S.ref_seq,23S.pct_id,23S.known_var,23S.C2597T,23S.C2597T.%,coding1.assembled,coding1.match,coding1.ref_seq,coding1.pct_id,coding2.assembled,coding2.match,coding2.ref_seq,coding2.pct_id,coding3.assembled,coding3.match,coding3.ref_seq,coding3.pct_id,coding5.assembled,coding5.match,coding5.ref_seq,coding5.pct_id,coding5.known_var,coding5.A42S,coding6.assembled,coding6.match,coding6.ref_seq,coding6.pct_id,coding6.known_var,coding6.A52S,coding7.assembled,coding7 [...]
+/nfs/users/nfs_m/mh12/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.1.tsv,yes,yes,23S.rDNA_WHO_F_01358c,99.86,yes,yes,100.0,interrupted,no,coding1_ref1,99.1,yes,yes,coding2_ref1,98.2,no,no,NA,NA,yes,no,coding5_ref1,97.4,no,no,yes,yes,coding6_ref1,95.5,yes,yes,yes,coding7_ref1,95.4,yes,yes,coding8_ref1,95.3,yes,yes,interrupted,mdfA.3001328.JQ394987.0_1233.561,97.0,yes,yes,yes,yes,noncoding1_ref1,99.1,yes,yes,noncoding10_ref1,95.1,yes,yes,99.0,yes,yes,noncoding11_ref1,9 [...]
+/nfs/users/nfs_m/mh12/sanger-pathogens/ariba/ariba/tests/data/summary_test_whole_run.in.2.tsv,yes_nonunique,no,23S.rDNA_WHO_F_01358c,99.84,het,het,12.8,yes,yes,coding1_ref2,99.2,no,no,NA,NA,yes,yes,coding3_ref1,97.6,yes,yes,coding5_ref1,97.4,yes,yes,no,no,NA,NA,NA,NA,no,NA,NA,no,no,NA,NA,NA,NA,no,NA,NA,NA,NA,yes,yes,noncoding1_ref2,99.2,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA,yes,yes,noncoding3_ref1,97.6,yes,no,noncoding5_ref1,99.42,no,no,NA,no,no,NA,NA,NA,NA,NA,no,no,NA,NA [...]
diff --git a/ariba/tests/data/summary_to_matrix.1.tsv b/ariba/tests/data/summary_to_matrix.1.tsv
index 1957349..fac2f53 100644
--- a/ariba/tests/data/summary_to_matrix.1.tsv
+++ b/ariba/tests/data/summary_to_matrix.1.tsv
@@ -1,5 +1,5 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 . 17 noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A42T 1 A42T SNP 42 42 A 84 84 T 17 . 17 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-presence_absence_ref1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 0 SNP p A10V . A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 10.0 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 17 T 17 noncoding_ref1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A42T 1 A42T SNP 42 42 A 84 84 T 17 T 17 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 T,G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_presence_absence_ref1 presence_absence_ref1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 20.1 0 SNP p A10V . A10V NONSYN 27 29 GCA 112 114 GTA 29;28;26 G;T;A 29;28;26 presence_absence_ref1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
diff --git a/ariba/tests/data/summary_to_matrix.2.tsv b/ariba/tests/data/summary_to_matrix.2.tsv
index 4a23ebc..296c84d 100644
--- a/ariba/tests/data/summary_to_matrix.2.tsv
+++ b/ariba/tests/data/summary_to_matrix.2.tsv
@@ -1,6 +1,6 @@
-#ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_alt_nt smtls_alt_depth var_description free_text
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 G 40,10 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
-noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 . 18 noncoding1:0:0:A6G:id3:variant in ref and reads so should report generic description of noncoding1
-noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
-presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 0 SNP p A10V . A10V NONSYN 28 28 C 113 113 T 29 . 29 presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
-variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
+#ariba_ref_name ref_name gene var_only flag reads cluster ref_len ref_base_assembled pc_ident ctg ctg_len ctg_cov known_var var_type var_seq_type known_var_change has_known_var ref_ctg_change ref_ctg_effect ref_start ref_end ref_nt ctg_start ctg_end ctg_nt smtls_total_depth smtls_nts smtls_nts_depth var_description free_text
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A14T 1 A14T SNP 13 13 A 84 84 T 50 T,G 40,10 noncoding1:0:0:A14T:id1:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_noncoding_ref1 noncoding_ref1 0 0 19 78 noncoding1 120 120 98.33 noncoding1.scaffold.1 279 50.1 1 SNP n A6G 1 . . 6 6 G 77 77 G 18 G 18 noncoding1:0:0:A6G:id3:variant in ref and reads so should report generic description of noncoding1
+ariba_noncoding_ref2 noncoding_ref2 0 0 19 78 noncoding2 120 120 98.33 noncoding2.scaffold.1 279 10.0 1 SNP n A52T 1 A52T SNP 42 42 A 84 84 T 17 T,G 20,30 noncoding_ref2:0:0:A42T:id2:ref has wild type, reads have variant so should report generic description of noncoding1
+ariba_presence_absence1 presence_absence1 1 0 27 88 presence_absence1 96 96 98.96 presence_absence1.scaffold.1 267 51.1 0 SNP p A10V . A10V NONSYN 27 29 GCA 112 114 GTA 29;30;31 G;T;A 29;30;31 presence_absence1:1:0:A10V:.:Ref has wild, reads have variant so report Generic description of presence_absence1
+ariba_variants_only1 variants_only1 1 1 64 12 variants_only1 90 . . . . . . . . . . . . . . . . . . . . . . .
diff --git a/ariba/tests/read_filter_test.py b/ariba/tests/read_filter_test.py
index c705835..75aa870 100644
--- a/ariba/tests/read_filter_test.py
+++ b/ariba/tests/read_filter_test.py
@@ -13,7 +13,9 @@ class TestReadFilter(unittest.TestCase):
self.external_progs = external_progs.ExternalProgs()
- def test_run_cdhit_est_2d(self):
+ # skip this, as no longer using cdhit2d, but leave it here in case we want
+ # to put it back in at a later date
+ def _test_run_cdhit_est_2d(self):
'''test _run_cdhit_est_2d'''
reads_in = os.path.join(data_dir, 'read_filter_test_run_cdhit_est_2d.reads.in.fa')
ref_in = os.path.join(data_dir, 'read_filter_test_run_cdhit_est_2d.ref.in.fa')
@@ -33,7 +35,9 @@ class TestReadFilter(unittest.TestCase):
self.assertEqual(expected, got)
- def test_run(self):
+ # skip this, as no longer using cdhit2d, but leave it here in case we want
+ # to put it back in at a later date
+ def _test_run(self):
'''test run'''
rstore_infile = os.path.join(data_dir, 'read_filter_test_run.in.read_store')
ref_fasta = os.path.join(data_dir, 'read_filter_test_run.in.ref.fa')
diff --git a/ariba/tests/ref_preparer_test.py b/ariba/tests/ref_preparer_test.py
index 00b979d..0e25a4e 100644
--- a/ariba/tests/ref_preparer_test.py
+++ b/ariba/tests/ref_preparer_test.py
@@ -132,6 +132,7 @@ class TestRefPreparer(unittest.TestCase):
got = os.path.join(tmp_out, filename)
self.assertTrue(filecmp.cmp(expected, got, shallow=False))
+ self.assertTrue(os.path.exists(os.path.join(tmp_out, '02.cdhit.all.fa.msh')))
shutil.rmtree(tmp_out)
diff --git a/ariba/tests/reference_data_test.py b/ariba/tests/reference_data_test.py
index ae723b2..02f74bd 100644
--- a/ariba/tests/reference_data_test.py
+++ b/ariba/tests/reference_data_test.py
@@ -52,6 +52,23 @@ class TestReferenceData(unittest.TestCase):
}
self.assertEqual(expected_seqs_dict, ref_data.sequences)
+ self.assertEqual({}, ref_data.ariba_to_original_name)
+
+ rename_file = os.path.join(data_dir, 'reference_data_init_ok.rename.tsv')
+ ref_data = reference_data.ReferenceData([fasta_in], [tsv_in], rename_file=rename_file)
+ expected_rename_dict = {'gene1': 'original_gene1', 'gene2': 'original_gene2'}
+ self.assertEqual(expected_rename_dict, ref_data.ariba_to_original_name)
+
+
+ def test_load_rename_file(self):
+ '''Test _load_rename_file'''
+ infile = os.path.join(data_dir, 'reference_data_load_rename_file.tsv')
+ got = reference_data.ReferenceData._load_rename_file(infile)
+ expected = {
+ 'ariba1': 'original1',
+ 'ariba2': 'original2'
+ }
+ self.assertEqual(expected, got)
def test_load_metadata_tsv(self):
diff --git a/ariba/tests/report_filter_test.py b/ariba/tests/report_filter_test.py
index 63c94d8..31ff036 100644
--- a/ariba/tests/report_filter_test.py
+++ b/ariba/tests/report_filter_test.py
@@ -12,10 +12,10 @@ class TestReportFilter(unittest.TestCase):
'''test __init__ on good input file'''
infile = os.path.join(data_dir, 'report_filter_test_init_good.tsv')
rf = report_filter.ReportFilter(infile=infile)
- line1 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', '.', '500', 'a:n:C42T:id1:foo', 'free_text'])
- line2 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id2:bar', 'free_text2'])
- line3 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '12.4', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id3:spam', 'free_text3'])
- line4 = '\t'.join(['cluster2', '1', '0', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '20.2', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', '.', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
+ line1 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', 'C', '500', 'a:n:C42T:id1:foo', 'free_text'])
+ line2 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '10.5', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id2:bar', 'free_text2'])
+ line3 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '12.4', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id3:spam', 'free_text3'])
+ line4 = '\t'.join(['ariba_cluster2', 'cluster2', '1', '0', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '20.2', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', 'T', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
expected = {
'cluster1': {
@@ -38,8 +38,9 @@ class TestReportFilter(unittest.TestCase):
def test_report_line_to_dict(self):
- line = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t999\t23.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
+ line = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t999\t23.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
expected = {
+ 'ariba_ref_name': 'ariba_cluster1',
'ref_name': 'cluster1',
'gene': '0',
'var_only': '0',
@@ -66,8 +67,8 @@ class TestReportFilter(unittest.TestCase):
'ctg_end': 142,
'ctg_nt': 'C',
'smtls_total_depth': '500',
- 'smtls_alt_nt': '.',
- 'smtls_alt_depth': '500',
+ 'smtls_nts': '.',
+ 'smtls_nts_depth': '500',
'var_description': 'a:n:C42T:id1:foo',
'free_text': 'free text',
}
@@ -81,6 +82,7 @@ class TestReportFilter(unittest.TestCase):
def test_dict_to_report_line(self):
'''Test _dict_to_report_line'''
report_dict = {
+ 'ariba_ref_name': 'ariba_cluster1',
'ref_name': 'cluster1',
'gene': '0',
'var_only': '0',
@@ -107,13 +109,13 @@ class TestReportFilter(unittest.TestCase):
'ctg_end': 142,
'ctg_nt': 'C',
'smtls_total_depth': '500',
- 'smtls_alt_nt': '.',
- 'smtls_alt_depth': '500',
+ 'smtls_nts': '.',
+ 'smtls_nts_depth': '500',
'var_description': 'a:n:C42T:id1:foo',
'free_text': 'free text',
}
- expected = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t1300\t42.4\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
+ expected = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t99.42\tcluster1.scaffold.1\t1300\t42.4\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\ta:n:C42T:id1:foo\tfree text'
self.assertEqual(expected, report_filter.ReportFilter._dict_to_report_line(report_dict))
@@ -121,10 +123,10 @@ class TestReportFilter(unittest.TestCase):
good_infile = os.path.join(data_dir, 'report_filter_test_load_report_good.tsv')
bad_infile = os.path.join(data_dir, 'report_filter_test_load_report_bad.tsv')
- line1 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', '.', '500', 'a:n:C42T:id1:foo', 'free_text'])
- line2 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id2:bar', 'free_text2'])
- line3 = '\t'.join(['cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '22.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', '.', '542', 'a:n:A51G:id3:spam', 'free_text3'])
- line4 = '\t'.join(['cluster2', '1', '1', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '33.3', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', '.', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
+ line1 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'C42T', '0', '.', '.', '42', '42', 'C', '142', '142', 'C', '500', 'C', '500', 'a:n:C42T:id1:foo', 'free_text'])
+ line2 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.1', '1300', '12.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id2:bar', 'free_text2'])
+ line3 = '\t'.join(['ariba_cluster1', 'cluster1', '0', '0', '27', '10000', 'cluster1', '1000', '999', '99.42', 'cluster1.scaffold.2', '1300', '22.2', '1', 'SNP', 'n', 'A51G', '0', '.', '.', '51', '51', 'C', '151', '151', 'C', '542', 'C', '542', 'a:n:A51G:id3:spam', 'free_text3'])
+ line4 = '\t'.join(['ariba_cluster2', 'cluster2', '1', '1', '179', '20000', 'cluster2', '1042', '1042', '42.42', 'cluster2.scaffold.1', '1442', '33.3', '1', 'SNP', 'p', 'I42L', '1', 'I42L', 'NONSYN', '112', '112', 'C', '442', '442', 'T', '300', 'T', '290', 'a:v:I42L:id4:eggs', 'free_text3'])
expected = {
'cluster1': {
@@ -181,9 +183,9 @@ class TestReportFilter(unittest.TestCase):
def test_report_dict_passes_essential_filters(self):
'''Test _report_dict_passes_essential_filters'''
- line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
- line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t0\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
- line3 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t0\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line3 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
tests = [
(report_filter.ReportFilter._report_line_to_dict(line1), True),
(report_filter.ReportFilter._report_line_to_dict(line2), False),
@@ -214,8 +216,8 @@ class TestReportFilter(unittest.TestCase):
def test_filter_list_of_dicts_all_fail(self):
'''Test _filter_list_of_dicts where all fail'''
rf = report_filter.ReportFilter()
- line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t88.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
- line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t88.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
got = rf._filter_list_of_dicts([dict1, dict2])
@@ -225,11 +227,11 @@ class TestReportFilter(unittest.TestCase):
def test_filter_list_of_dicts_with_essential(self):
'''Test _filter_list_of_dicts with an essential line but all others fail'''
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
- line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
- line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t400\t12.2\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
- expected_line = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t' + '\t'.join(['.'] * 17) + '\tfree text'
+ expected_line = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t400\t12.2\t' + '\t'.join(['.'] * 17) + '\tfree text'
expected = [report_filter.ReportFilter._report_line_to_dict(expected_line)]
assert expected != [None]
got = rf._filter_list_of_dicts([dict1, dict2])
@@ -239,9 +241,9 @@ class TestReportFilter(unittest.TestCase):
def test_filter_list_of_dicts_with_pass(self):
'''Test _filter_list_of_dicts with a line that passes'''
rf = report_filter.ReportFilter(ignore_not_has_known_variant=True)
- line1 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
- line2 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
- line3 = 'cluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line1 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
+ line2 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t98.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC46T\t1\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C46T\tfree text'
+ line3 = 'ariba_cluster1\tcluster1\t0\t0\t27\t10000\tcluster1\t1000\t999\t78.42\tcluster1.scaffold.1\t500\t12.1\t1\tSNP\tn\tC42T\t0\t.\t.\t42\t42\tC\t142\t142\tC\t500\t.\t500\tDescription_of_variant C42T\tfree text'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
dict3 = report_filter.ReportFilter._report_line_to_dict(line3)
@@ -252,9 +254,9 @@ class TestReportFilter(unittest.TestCase):
def test_remove_all_after_first_frameshift(self):
'''Test _remove_all_after_first_frameshift'''
self.assertEqual([], report_filter.ReportFilter._remove_all_after_first_frameshift([]))
- line1 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t65\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
- line2 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tQ37fs\tFSHIFT\t109\t109\tA\t634\t634\t.\t67\t.\t67\t.\t.'
- line3 = 'cluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t265\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
+ line1 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t65\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
+ line2 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tQ37fs\tFSHIFT\t109\t109\tA\t634\t634\t.\t67\t.\t67\t.\t.'
+ line3 = 'ariba_cluster1\tcluster1\t1\t0\t528\t1874\tcluster1\t1188\t1097\t92.43\tcluster1.scaffold.1\t2218\t42.42\t0\t.\tp\t.\t0\tE89G\tNONSYN\t265\t265\tA;A\t766\t766\tG;C\t88;90\t.;.\t87;90\t.\t.'
dict1 = report_filter.ReportFilter._report_line_to_dict(line1)
dict2 = report_filter.ReportFilter._report_line_to_dict(line2)
dict3 = report_filter.ReportFilter._report_line_to_dict(line3)
diff --git a/ariba/tests/samtools_variants_test.py b/ariba/tests/samtools_variants_test.py
index ce8f855..e521c87 100644
--- a/ariba/tests/samtools_variants_test.py
+++ b/ariba/tests/samtools_variants_test.py
@@ -78,10 +78,10 @@ class TestSamtoolsVariants(unittest.TestCase):
tests = [
( ('ref1', 42), None ),
( ('ref2', 1), None ),
- ( ('ref1', 0), ('G', '.', 1, '1') ),
- ( ('ref1', 2), ('T', 'A', 3, '2,1') ),
- ( ('ref1', 3), ('C', 'A,G', 42, '21,11,10') ),
- ( ('ref1', 4), ('C', 'AC', 41, '0,42') )
+ ( ('ref1', 0), ('G', 1, '1') ),
+ ( ('ref1', 2), ('T,A', 3, '2,1') ),
+ ( ('ref1', 3), ('C,A,G', 42, '21,11,10') ),
+ ( ('ref1', 4), ('C,AC', 41, '0,42') )
]
for (name, position), expected in tests:
@@ -113,12 +113,12 @@ class TestSamtoolsVariants(unittest.TestCase):
]
expected = {
'16__cat_2_M35190.scaffold.1': {
- 92: ('T', 'A', 123, '65,58'),
- 179: ('A', 'T', 86, '41,45'),
- 263: ('G', 'C', 97, '53,44'),
+ 92: ('T,A',123, '65,58'),
+ 179: ('A,T', 86, '41,45'),
+ 263: ('G,C', 97, '53,44'),
},
'16__cat_2_M35190.scaffold.6': {
- 93: ('T', 'G', 99, '56,43')
+ 93: ('T,G', 99, '56,43')
}
}
@@ -159,9 +159,9 @@ class TestSamtoolsVariants(unittest.TestCase):
)
samtools_vars.run()
tests = [
- (('ref', 425), ('C', 'T', 31, '18,13')),
- (('not_a_ref', 10), ('ND', 'ND', 'ND', 'ND')),
- (('ref', 1000000000), ('ND', 'ND', 'ND', 'ND'))
+ (('ref', 425), ('C,T', 31, '18,13')),
+ (('not_a_ref', 10), ('ND', 'ND', 'ND')),
+ (('ref', 1000000000), ('ND', 'ND', 'ND'))
]
for (ref, pos), expected in tests:
got = samtools_vars.get_depths_at_position(ref, pos)
diff --git a/ariba/tests/summary_cluster_test.py b/ariba/tests/summary_cluster_test.py
index f5022fc..2cf8f19 100644
--- a/ariba/tests/summary_cluster_test.py
+++ b/ariba/tests/summary_cluster_test.py
@@ -8,9 +8,10 @@ data_dir = os.path.join(modules_dir, 'tests', 'data')
class TestSummaryCluster(unittest.TestCase):
def test_line2dict(self):
'''Test _line2dict'''
- line = 'refname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:var_group1:ref has wild type, foo bar\tsome free text'
+ line = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:var_group1:ref has wild type, foo bar\tsome free text'
expected = {
+ 'ariba_ref_name': 'ariba_refname',
'ref_name': 'refname',
'gene': '1',
'var_only' : '0',
@@ -37,8 +38,8 @@ class TestSummaryCluster(unittest.TestCase):
'ctg_end': 84,
'ctg_nt': 'T',
'smtls_total_depth': '17',
- 'smtls_alt_nt': '.',
- 'smtls_alt_depth': '17',
+ 'smtls_nts': 'T',
+ 'smtls_nts_depth': '17',
'var_description': 'noncoding1:1:0:A14T:var_group1:ref has wild type, foo bar',
'var_group': 'var_group1',
'free_text': 'some free text'
@@ -51,9 +52,9 @@ class TestSummaryCluster(unittest.TestCase):
'''Test add_data_dict'''
cluster = summary_cluster.SummaryCluster()
self.assertTrue(cluster.name is None)
- line1 = 'refname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
- line2 = 'refname\t1\t0\t19\t78\tcluster2\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id2:ref has wild type, foo bar\tsome free text'
- line3 = 'refname2\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text'
+ line1 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line2 = 'ariba_refname1\trefname\t1\t0\t19\t78\tcluster2\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id2:ref has wild type, foo bar\tsome free text'
+ line3 = 'ariba_refname2\trefname2\t1\t0\t19\t78\tcluster\t120\t120\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text'
data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
@@ -67,13 +68,30 @@ class TestSummaryCluster(unittest.TestCase):
cluster.add_data_dict(data_dict3)
+ def test_has_any_part_of_ref_assembled(self):
+ '''Test _has_any_part_of_ref_assembled'''
+ line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t.\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t0\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
+ data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
+ data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
+ cluster = summary_cluster.SummaryCluster()
+ cluster.add_data_dict(data_dict1)
+ self.assertFalse(cluster._has_any_part_of_ref_assembled())
+ cluster.add_data_dict(data_dict2)
+ self.assertFalse(cluster._has_any_part_of_ref_assembled())
+ cluster.add_data_dict(data_dict3)
+ self.assertTrue(cluster._has_any_part_of_ref_assembled())
+
+
def test_pc_id_of_longest(self):
'''Test pc_id_of_longest'''
cluster = summary_cluster.SummaryCluster()
self.assertTrue(cluster.name is None)
- line1 = 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
- line2 = 'refname\t1\t0\t19\t78\tcluster\t120\t119\t98.20\tctg_name2\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
- line3 = 'refname\t1\t0\t19\t78\tcluster\t120\t114\t98.32\tctg_name3\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line1 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line2 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t119\t98.20\tctg_name2\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line3 = 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t114\t98.32\tctg_name3\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text'
data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
data_dict3 = summary_cluster.SummaryCluster.line2dict(line3)
@@ -85,11 +103,11 @@ class TestSummaryCluster(unittest.TestCase):
def test_to_cluster_summary_number(self):
'''Test _to_cluster_summary_assembled'''
- line = 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text'
+ line = 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text'
data_dict = summary_cluster.SummaryCluster.line2dict(line)
tests = [
- ('0', 0, 'no'),
+ ('0', 0, 'partial'),
('0', 64, 'no'),
('0', 1024, 'no'),
('0', 1, 'fragmented'),
@@ -99,7 +117,7 @@ class TestSummaryCluster(unittest.TestCase):
('0', 51, 'yes_nonunique'),
('0', 147, 'yes_nonunique'),
('0', 275, 'yes_nonunique'),
- ('1', 0, 'no'),
+ ('1', 0, 'partial'),
('1', 64, 'no'),
('1', 1024, 'no'),
('1', 1, 'fragmented'),
@@ -117,20 +135,26 @@ class TestSummaryCluster(unittest.TestCase):
data_dict['flag'] = flag.Flag(f)
cluster.add_data_dict(data_dict)
self.assertEqual(expected, cluster._to_cluster_summary_assembled())
+ if expected == 'partial':
+ original_number = cluster.data[0]['ref_base_assembled']
+ cluster.data[0]['ref_base_assembled'] = 0
+ self.assertEqual('no', cluster._to_cluster_summary_assembled())
+ cluster.data[0]['ref_base_assembled'] = original_number
def test_has_known_variant(self):
'''Test _has_known_variant'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
]
dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
- expected = [True, False, False, False, False]
+ expected = ['yes', 'no', 'no', 'no', 'no', 'het']
assert len(dicts) == len(expected)
for i in range(len(dicts)):
@@ -139,14 +163,15 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_any_known_variant(self):
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
]
- expected = ['yes', 'no', 'no', 'no', 'no']
+ expected = ['yes', 'no', 'no', 'no', 'no', 'het']
assert len(lines) == len(expected)
for i in range(len(lines)):
@@ -159,16 +184,18 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_nonsynonymous(self):
'''Test _has_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
]
dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
- expected = [False, True, False, True, True, True]
+ expected = ['no', 'yes', 'no', 'yes', 'yes', 'yes', 'het', 'het']
assert len(dicts) == len(expected)
for i in range(len(dicts)):
@@ -178,14 +205,16 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_any_nonsynonymous(self):
'''Test _has_any_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:N_ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:N_ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
]
- expected = ['no', 'yes', 'no', 'yes', 'yes']
+ expected = ['no', 'yes', 'no', 'yes', 'yes', 'het', 'het']
assert len(lines) == len(expected)
for i in range(len(lines)):
@@ -198,32 +227,36 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_novel_nonsynonymous(self):
'''Test _has_novel_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
]
dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
- expected = [False, False, True, True, True]
+ expected = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'het']
assert len(dicts) == len(expected)
- for i in range(len(dicts)-1):
+ for i in range(len(dicts)):
self.assertEqual(expected[i], summary_cluster.SummaryCluster._has_novel_nonsynonymous(dicts[i]))
def test_has_any_novel_nonsynonymous(self):
'''Test _has_any_novel_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\t.\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.',
+ 'ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t0\tHET\t.\t.\t.\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t.\t.',
]
- expected = ['no', 'no', 'yes', 'yes', 'yes']
+ expected = ['no', 'no', 'yes', 'yes', 'yes', 'no', 'het']
assert len(lines) == len(expected)
for i in range(len(lines)):
@@ -236,11 +269,11 @@ class TestSummaryCluster(unittest.TestCase):
def test_to_cluster_summary_has_known_nonsynonymous(self):
'''Test _to_cluster_summary_has_known_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
]
expected = ['yes', 'yes', 'no', 'no', 'no']
@@ -257,11 +290,11 @@ class TestSummaryCluster(unittest.TestCase):
def test_to_cluster_summary_has_novel_nonsynonymous(self):
'''Test _to_cluster_summary_has_novel_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
]
expected = ['no', 'no', 'no', 'yes', 'yes']
@@ -278,11 +311,11 @@ class TestSummaryCluster(unittest.TestCase):
def test_to_cluster_summary_has_nonsynonymous(self):
'''Test _to_cluster_summary_has_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\t.\tn\t.\t.\t.\tMULTIPLE\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
]
expected = ['no', 'yes', 'no', 'yes', 'yes']
@@ -309,16 +342,16 @@ class TestSummaryCluster(unittest.TestCase):
self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
d['ref_ctg_effect'] = 'SNP'
- d['smtls_alt_nt'] = '.'
+ d['smtls_nts'] = '.'
self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
- d['smtls_alt_nt'] = 'A;G;T'
+ d['smtls_nts'] = 'A;G;T'
self.assertEqual(None, summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
d['known_var_change'] = 'A42T'
d['ctg_nt'] = 'A'
- d['smtls_alt_nt'] = 'T'
- d['smtls_alt_depth'] = '52,48'
+ d['smtls_nts'] = 'A,T'
+ d['smtls_nts_depth'] = '52,48'
self.assertEqual(('A42T', 48.0), summary_cluster.SummaryCluster._get_known_noncoding_het_snp(d))
@@ -326,6 +359,7 @@ class TestSummaryCluster(unittest.TestCase):
'''Test _get_nonsynonymous_var'''
d = {
'ref_name': 'ref',
+ 'gene': '1',
'var_type': '.',
'known_var_change': '.',
'has_known_var': '.',
@@ -341,7 +375,7 @@ class TestSummaryCluster(unittest.TestCase):
d['var_type'] = 'p'
d['known_var'] = '1'
d['has_known_var'] = '1'
- with self.assertRaises(summary_cluster.Error):
+ with self.assertRaises(summary_cluster_variant.Error):
summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
d['known_var_change'] = 'I42L'
@@ -352,14 +386,14 @@ class TestSummaryCluster(unittest.TestCase):
d['var_group'] = '.'
d['ref_ctg_change'] = 'P43Q'
- with self.assertRaises(summary_cluster.Error):
+ with self.assertRaises(summary_cluster_variant.Error):
summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
d['known_var_change'] = '.'
self.assertEqual(('ref', 'P43Q', 'novel', None), summary_cluster.SummaryCluster._get_nonsynonymous_var(d))
d['ref_ctg_change'] = '.'
- with self.assertRaises(summary_cluster.Error):
+ with self.assertRaises(summary_cluster_variant.Error):
summary_cluster.SummaryCluster._get_nonsynonymous_var(d)
d['ref_ctg_effect'] = 'MULTIPLE'
@@ -369,16 +403,16 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_match(self):
'''Test _has_match'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id1:ref has wild type, foo bar\tsome free text',
]
expected = ['yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'no', 'no']
@@ -396,14 +430,14 @@ class TestSummaryCluster(unittest.TestCase):
def test_has_var_groups(self):
'''Test has_var_groups'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id2:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id4:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:0:A14T:id5:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id6:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id2:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id3:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id4:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:0:A14T:id5:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id6:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tp\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t1\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tp\tA14T\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:1:1:A14T:id7:ref has wild type, foo bar\tsome free text',
]
dicts = [summary_cluster.SummaryCluster.line2dict(line) for line in lines]
cluster = summary_cluster.SummaryCluster()
@@ -416,8 +450,8 @@ class TestSummaryCluster(unittest.TestCase):
def test_column_summary_data(self):
'''Test column_summary_data'''
- line1 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:n:A14T:id1:foo_bar\tspam eggs'
- line2 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
+ line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:n:A14T:id1:foo_bar\tspam eggs'
+ line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
@@ -438,8 +472,8 @@ class TestSummaryCluster(unittest.TestCase):
def test_non_synon_variants(self):
'''Test non_synon_variants'''
- line1 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs'
- line2 = 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
+ line1 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs'
+ line2 = 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text'
data_dict1 = summary_cluster.SummaryCluster.line2dict(line1)
data_dict2 = summary_cluster.SummaryCluster.line2dict(line2)
@@ -454,10 +488,10 @@ class TestSummaryCluster(unittest.TestCase):
def test_known_noncoding_het_snps(self):
'''test known_noncoding_het_snps'''
lines = [
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tT\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs'
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs'
]
cluster = summary_cluster.SummaryCluster()
@@ -466,7 +500,7 @@ class TestSummaryCluster(unittest.TestCase):
got = cluster.known_noncoding_het_snps()
expected = {
'.': {'A82T': 40.0},
- 'id1': {'A42T': 25.0},
+ 'id1': {'A42T': 25.0, 'A14T': 100.0},
'id2': {'A62T': 75.0},
}
self.assertEqual(expected, got)
@@ -475,10 +509,10 @@ class TestSummaryCluster(unittest.TestCase):
def test_get_all_nonsynon_variants_set(self):
'''test _get_all_nonsynon_variants_set'''
lines = [
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
- 'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
+ 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
]
data_dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
@@ -492,10 +526,10 @@ class TestSummaryCluster(unittest.TestCase):
def test_gather_data(self):
'''test gather_data'''
lines = [
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
- 'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t95\t98.42\tctg_name\t279\t24.4\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\t.\tsome free text',
+ 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
]
data_dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
diff --git a/ariba/tests/summary_cluster_variant_test.py b/ariba/tests/summary_cluster_variant_test.py
index d2e8377..f88cc0c 100644
--- a/ariba/tests/summary_cluster_variant_test.py
+++ b/ariba/tests/summary_cluster_variant_test.py
@@ -7,12 +7,12 @@ class TestSummaryClusterVariant(unittest.TestCase):
def test_has_nonsynonymous(self):
'''Test _has_nonsynonymous'''
lines = [
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.',
- 'refname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\t.;.\t207;204\t.\t.'
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSYN\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t0\t0\t19\t78\tcluster\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tSNP\tn\t.\t.\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnoncoding1:0:0:A14T:id1:ref has wild type, foo bar\tsome free text',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tMULTIPLE\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\tC;C\t207;204\t.\t.',
+ 'ariba_refname\trefname\t1\t0\t528\t2814\tcluster\t1188\t1009\t90.49\tctg_name\t2470\t141.8\t0\t.\tp\t.\t0\t.\tINDELS\t594\t594\tC;T\t1195\t1195\t.;C\t207;204\tC;C\t207;204\t.\t.'
]
dicts = [summary_cluster.SummaryCluster.line2dict(x) for x in lines]
@@ -48,51 +48,50 @@ class TestSummaryClusterVariant(unittest.TestCase):
def test_get_is_het_and_percent(self):
'''test _get_is_het_and_percent'''
- lines = [
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tT\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT\t95,5\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT\t90,10\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,C\t90,6,4\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tT,C\t3,7,90\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tHET\t.\t.\t.\t.\t.\t.\t.\t.\t84\t84\tA\t50\tT\t40,10\t.\t.'
- ]
-
- expected = [
- (False, None),
- (True, 25.0),
- (True, 75.0),
- (True, 40.0),
- (False, 5.0),
- (True, 10.0),
- (True, 6.0),
- (True, 7.0),
- (True, 20.0)
+ tests = [
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs', (False, 100.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA42T\t1\tA42T\tSNP\t42\t42\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A42T:id1:foo_bar\tspam eggs', (True, 25.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA62T\t1\tA62T\tSNP\t62\t62\tA\t84\t84\tA\t40\tA,T\t10,30\tnon_coding1:0:0:A62T:id2:foo_bar\tspam eggs', (True, 75.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,G\t10,40,50\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 40.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T\t95,5\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (False, 5.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 10.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,C\t90,6,4\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 6.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA82T\t1\tA82T\tSNP\t82\t82\tA\t84\t84\tA\t100\tA,T,C\t3,7,90\tnon_coding1:0:0:A82T:.:foo_bar\tspam eggs', (True, 7.0)),
+ ('ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t0\tHET\t.\t.\t.\t.\t.\t.\t.\t.\t84\t84\tA\t50\tA,T\t40,10\t.\t.', (True, 20.0)),
+ ('ariba_ref1\t23S.rDNA_WHO_F_01358c\t0\t1\t531\t9914\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3120\t744.8\t1\tSNP\tn\tC2597T\t1\tC2597T\tSNP\t2597\t2597\tC\t2755\t2755\tT\t823\tTC,T\t487,1\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T.\tHigh-level resistance to Azithromycin', (False, 100.0)),
+ ('ariba\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t90,10\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 10.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t91,9\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (False, 9.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t50,50\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 50.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t0\t.\t.\t2597\t2597\tC\t2928\t2928\tC\t410\tC,T\t70,30\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 30.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t91,9\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (False, 91.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t90,10\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 90.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t50,50\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 50.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t10,90\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 10.0)),
+ ('ariba_23S.rDNA_WHO_F_01358c\t23S.rDNA_WHO_F_01358c\t0\t1\t659\t4168\t23S\t2890\t2890\t99.86\t23S.scaffold.1\t3628\t344.0\t1\tSNP\tn\tC2597T\t1\t.\t.\t2597\t2597\tC\t2928\t2928\tT\t410\tT,C\t9,91\t23S.rDNA_WHO_F_01358c:0:1:C2597T:.:E coli C2611T\t.', (True, 9.0)),
]
- assert len(lines) == len(expected)
- for i in range(len(lines)):
- data_dict = summary_cluster.SummaryCluster.line2dict(lines[i])
+ for line, expected in tests:
+ data_dict = summary_cluster.SummaryCluster.line2dict(line)
got = summary_cluster_variant.SummaryClusterVariant._get_is_het_and_percent(data_dict)
- self.assertEqual(expected[i], got)
+ self.assertEqual(expected, got)
def test_init(self):
'''test __init__'''
lines = [
- 'ref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\t.\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
- 'ref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tA,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t1\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tI14L\t1\tI14L\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:I14L:.:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t17\tT\t17\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A\t10,30\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t1\tA14T\tSNP\t13\t13\tA\t84\t84\tT\t40\tT,A,G\t20,10,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
+ 'ariba_ref1\tref1\t0\t0\t531\t78\tcluster1\t120\t100\t98.33\tctg_name\t279\t24.4\t1\tSNP\tn\tA14T\t0\t.\t.\t13\t13\tA\t84\t84\tA\t100\tA,T\t90,10\tnon_coding1:0:0:A14T:id1:foo_bar\tspam eggs',
]
expected = [
{'coding': True, 'known': True, 'var_string': 'I14L', 'var_group': '.', 'het_percent': None},
- {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': None},
+ {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 100.0},
{'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 25.0},
{'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 50.0},
+ {'coding': False, 'known': True, 'var_string': 'A14T', 'var_group': 'id1', 'het_percent': 10.0},
]
assert len(lines) == len(expected)
diff --git a/ariba/tests/summary_test.py b/ariba/tests/summary_test.py
index 280b301..e54a0fd 100644
--- a/ariba/tests/summary_test.py
+++ b/ariba/tests/summary_test.py
@@ -65,7 +65,7 @@ class TestSummary(unittest.TestCase):
def test_gather_unfiltered_output_data(self):
- '''test gather_output_rows_new'''
+ '''test gather_unfiltered_output_data'''
infiles = [
os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.1.tsv'),
os.path.join(data_dir, 'summary_gather_unfiltered_output_data.in.2.tsv')
@@ -111,12 +111,12 @@ class TestSummary(unittest.TestCase):
},
'presence_absence2': {
'summary': {
- 'assembled': 'no',
- 'known_var': 'NA',
+ 'assembled': 'partial',
+ 'known_var': 'no',
'match': 'no',
- 'novel_var': 'NA',
- 'pct_id': 'NA',
- 'ref_seq': 'NA'
+ 'novel_var': 'yes',
+ 'pct_id': '99.1',
+ 'ref_seq': 'presence_absence_ref2'
},
'groups': {},
'vars': {}
@@ -212,17 +212,18 @@ class TestSummary(unittest.TestCase):
}
}
+ self.maxDiff = None
s = summary.Summary('out', filenames=infiles)
s.samples = summary.Summary._load_input_files(infiles, 90)
s._gather_unfiltered_output_data()
self.assertEqual(expected_potential_cols, s.all_potential_columns)
self.assertEqual(expected_all, s.all_data)
- expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%'}
+ expected_potential_cols['noncoding1']['groups'] = {'id3', 'id1', 'id1.%', 'id3.%'}
expected_potential_cols['noncoding2']['groups'] = {'id2.%', 'id2'}
- expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes'}
+ expected_all[infiles[0]]['noncoding1']['groups'] = {'id1': 'yes', 'id1.%': 100.0}
expected_all[infiles[0]]['noncoding2']['groups'] = {'id2': 'yes_multi_het', 'id2.%': 'NA'}
- expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes'}
+ expected_all[infiles[1]]['noncoding1']['groups'] = {'id1': 'het', 'id1.%': 80.0, 'id3': 'yes', 'id3.%': 100.0}
expected_all[infiles[1]]['noncoding2']['groups'] = {'id2': 'het', 'id2.%': 40.0}
s = summary.Summary('out', filenames=infiles, show_var_groups=True)
s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -230,12 +231,12 @@ class TestSummary(unittest.TestCase):
self.assertEqual(expected_potential_cols, s.all_potential_columns)
self.assertEqual(expected_all, s.all_data)
- expected_potential_cols['noncoding1']['vars'] = {'A14T.%', 'A6G', 'A14T'}
- expected_potential_cols['noncoding2']['vars'] = {'A52T', 'A52T.%', 'A42T'}
+ expected_potential_cols['noncoding1']['vars'] = {'A14T.%', 'A6G', 'A6G.%', 'A14T'}
+ expected_potential_cols['noncoding2']['vars'] = {'A52T', 'A52T.%', 'A42T', 'A42T.%'}
- expected_all[infiles[0]]['noncoding1']['vars'] = {'A14T': 'yes'}
- expected_all[infiles[0]]['noncoding2']['vars'] = {'A42T': 'yes', 'A52T': 'het', 'A52T.%': 40.0}
- expected_all[infiles[1]]['noncoding1']['vars'] = {'A14T': 'het', 'A14T.%': 80.0, 'A6G': 'yes'}
+ expected_all[infiles[0]]['noncoding1']['vars'] = {'A14T': 'yes', 'A14T.%': 100.0}
+ expected_all[infiles[0]]['noncoding2']['vars'] = {'A42T': 'yes', 'A42T.%': 100.0, 'A52T': 'het', 'A52T.%': 40.0}
+ expected_all[infiles[1]]['noncoding1']['vars'] = {'A14T': 'het', 'A14T.%': 80.0, 'A6G': 'yes', 'A6G.%': 100.0}
expected_all[infiles[1]]['noncoding2']['vars'] = {'A52T': 'het', 'A52T.%': 40.0}
s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True)
s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -244,7 +245,9 @@ class TestSummary(unittest.TestCase):
self.assertEqual(expected_all, s.all_data)
expected_potential_cols['presence_absence1']['vars'] = {'A10V'}
+ expected_potential_cols['presence_absence2']['vars'] = {'V175L'}
expected_all[infiles[0]]['presence_absence1']['vars'] = {'A10V': 'yes'}
+ expected_all[infiles[0]]['presence_absence2']['vars'] = {'V175L': 'yes'}
expected_all[infiles[1]]['presence_absence1']['vars'] = {'A10V': 'yes'}
s = summary.Summary('out', filenames=infiles, show_var_groups=True, show_known_vars=True, show_novel_vars=True)
s.samples = summary.Summary._load_input_files(infiles, 90)
@@ -265,11 +268,11 @@ class TestSummary(unittest.TestCase):
s._gather_unfiltered_output_data()
got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
- expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncodin [...]
- expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'noncoding2.A42T', 'noncoding2. [...]
+ expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding1.A6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known [...]
+ expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding1.A6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2 [...]
expected_matrix = [
- [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
- [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
+ [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
+ [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
]
self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -289,11 +292,11 @@ class TestSummary(unittest.TestCase):
s._gather_unfiltered_output_data()
got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
- expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence_absence1.assembled:o1' [...]
- expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_absence1.ref_seq', 'p [...]
+ expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.id1:o1', 'noncoding1.id1.%:c2', 'noncoding1.id3:o1', 'noncoding1.id3.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.id2:o1', 'noncoding2.id2.%:c2', 'presence [...]
+ expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.id1', 'noncoding1.id1.%', 'noncoding1.id3', 'noncoding1.id3.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.id2', 'noncoding2.id2.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presence_a [...]
expected_matrix = [
- [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'],
- [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes']
+ [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes_multi_het', 'NA', 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes'],
+ [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes']
]
self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -313,11 +316,11 @@ class TestSummary(unittest.TestCase):
s._gather_unfiltered_output_data()
got_phandango_header, got_csv_header, got_matrix = summary.Summary._to_matrix(infiles, s.all_data, s.all_potential_columns, s.cluster_columns)
- expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.A42T:o1', 'noncoding2.A52T:o1', 'noncoding2.A52T.%:c2', 'prese [...]
- expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.A42T', 'noncoding2.A52T', 'noncoding2.A52T.%', 'presence_absence1.assembled', 'presence_absence1.match', 'presenc [...]
+ expected_phandango_header = ['name', 'noncoding1.assembled:o1', 'noncoding1.match:o1', 'noncoding1.ref_seq:o2', 'noncoding1.pct_id:c1', 'noncoding1.known_var:o1', 'noncoding1.novel_var:o1', 'noncoding1.A14T:o1', 'noncoding1.A14T.%:c2', 'noncoding1.A6G:o1', 'noncoding1.A6G.%:c2', 'noncoding2.assembled:o1', 'noncoding2.match:o1', 'noncoding2.ref_seq:o3', 'noncoding2.pct_id:c1', 'noncoding2.known_var:o1', 'noncoding2.novel_var:o1', 'noncoding2.A42T:o1', 'noncoding2.A42T.%:c2', 'nonc [...]
+ expected_csv_header = ['name', 'noncoding1.assembled', 'noncoding1.match', 'noncoding1.ref_seq', 'noncoding1.pct_id', 'noncoding1.known_var', 'noncoding1.novel_var', 'noncoding1.A14T', 'noncoding1.A14T.%', 'noncoding1.A6G', 'noncoding1.A6G.%', 'noncoding2.assembled', 'noncoding2.match', 'noncoding2.ref_seq', 'noncoding2.pct_id', 'noncoding2.known_var', 'noncoding2.novel_var', 'noncoding2.A42T', 'noncoding2.A42T.%', 'noncoding2.A52T', 'noncoding2.A52T.%', 'presence_absence1.assemb [...]
expected_matrix = [
- [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 'NA', 'no', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
- [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
+ [infiles[0], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'yes', 100.0, 'no', 'NA', 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'yes', 100.0, 'het', 40.0, 'yes', 'yes', 'presence_absence_ref1', '98.96', 'no', 'yes', 'yes'],
+ [infiles[1], 'yes', 'yes', 'noncoding_ref1', '98.33', 'yes', 'no', 'het', 80.0, 'yes', 100.0, 'yes', 'yes', 'noncoding_ref2', '98.33', 'yes', 'no', 'no', 'NA', 'het', 40.0, 'yes', 'yes', 'presence_absence1', '98.96', 'no', 'yes', 'yes']
]
self.assertEqual(expected_phandango_header, got_phandango_header)
@@ -559,6 +562,7 @@ class TestSummary(unittest.TestCase):
expected = [line.rstrip().split(',', maxsplit=1)[1] for line in f]
with open(tmp_out + '.csv') as f:
got = [line.rstrip().split(',', maxsplit=1)[1] for line in f]
+
self.assertEqual(expected, got)
os.unlink(tmp_out + '.csv')
os.unlink(tmp_out + '.phandango.csv')
diff --git a/scripts/ariba b/scripts/ariba
index a9287ba..381c69c 100755
--- a/scripts/ariba
+++ b/scripts/ariba
@@ -49,6 +49,7 @@ subparser_getref = subparsers.add_parser(
usage='ariba getref [options] <db> <outprefix>',
description='Download reference data from one of a few supported public resources',
)
+subparser_getref.add_argument('--debug', action='store_true', help='Do not delete temporary downloaded files')
subparser_getref.add_argument('--version', help='Version of reference data to download. If not used, gets the latest version. Only applies to card')
subparser_getref.add_argument('db', help='Database to download. Must be one of: ' + ' '.join(allowed_dbs), choices=allowed_dbs, metavar="DB name")
subparser_getref.add_argument('outprefix', help='Prefix of output filenames')
@@ -75,12 +76,13 @@ cdhit_group.add_argument('--cdhit_clusters', help='File specifying how the seque
cdhit_group.add_argument('--cdhit_min_id', type=float, help='Sequence identity threshold (cd-hit option -c) [%(default)s]', default=0.9, metavar='FLOAT')
cdhit_group.add_argument('--cdhit_min_length', type=float, help='length difference cutoff (cd-hit option -s) [%(default)s]', default=0.9, metavar='FLOAT')
-other_group = subparser_prepareref.add_argument_group('other options')
-other_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6)
-other_group.add_argument('--max_gene_length', type=int, help='Maximum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=10000)
-other_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
-other_group.add_argument('--threads', type=int, help='Number of threads (currently only applies to cdhit) [%(default)s]', default=1, metavar='INT')
-other_group.add_argument('--verbose', action='store_true', help='Be verbose')
+other_prep_group = subparser_prepareref.add_argument_group('other options')
+other_prep_group.add_argument('--min_gene_length', type=int, help='Minimum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=6)
+other_prep_group.add_argument('--max_gene_length', type=int, help='Maximum allowed length in nucleotides of reference genes [%(default)s]', metavar='INT', default=10000)
+other_prep_group.add_argument('--genetic_code', type=int, help='Number of genetic code to use. Currently supported 1,4,11 [%(default)s]', choices=[1,4,11], default=11, metavar='INT')
+other_prep_group.add_argument('--force', action='store_true', help='Overwrite output directory, if it already exists')
+other_prep_group.add_argument('--threads', type=int, help='Number of threads (currently only applies to cdhit) [%(default)s]', default=1, metavar='INT')
+other_prep_group.add_argument('--verbose', action='store_true', help='Be verbose')
subparser_prepareref.add_argument('outdir', help='Output directory (must not already exist)')
subparser_prepareref.set_defaults(func=ariba.tasks.prepareref.run)
@@ -101,20 +103,20 @@ subparser_refquery.set_defaults(func=ariba.tasks.refquery.run)
#----------------------------- reportfilter -------------------------------
-subparser_reportfilter = subparsers.add_parser(
- 'reportfilter',
- help='Filters a report tsv file',
- description='Filters an ARIBA report tsv file made by "ariba run"',
- usage='ariba reportfilter [options] <infile> <outfile>'
-)
-subparser_reportfilter.add_argument('--exclude_flags', help='Comma-separated list of flags to exclude. [%(default)s]', default='assembly_fail,ref_seq_choose_fail')
-subparser_reportfilter.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
-subparser_reportfilter.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
-subparser_reportfilter.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
-subparser_reportfilter.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
-subparser_reportfilter.add_argument('infile', help='Name of input tsv file')
-subparser_reportfilter.add_argument('outfile', help='Name of output tsv file')
-subparser_reportfilter.set_defaults(func=ariba.tasks.reportfilter.run)
+#subparser_reportfilter = subparsers.add_parser(
+# 'reportfilter',
+# help='Filters a report tsv file',
+# description='Filters an ARIBA report tsv file made by "ariba run"',
+# usage='ariba reportfilter [options] <infile> <outfile>'
+#)
+#subparser_reportfilter.add_argument('--exclude_flags', help='Comma-separated list of flags to exclude. [%(default)s]', default='assembly_fail,ref_seq_choose_fail')
+#subparser_reportfilter.add_argument('--min_pc_id', type=float, help='Minimum percent identity of nucmer match between contig and reference [%(default)s]', default=90.0, metavar='FLOAT')
+#subparser_reportfilter.add_argument('--min_ref_base_asm', type=int, help='Minimum number of reference bases matching assembly [%(default)s]', default=1, metavar='INT')
+#subparser_reportfilter.add_argument('--keep_syn', action='store_true', help='Keep synonymous variants (by default they are removed')
+#subparser_reportfilter.add_argument('--discard_without_known_var', action='store_true', help='Applies to variant only genes. Filter out where there is a known variant, but the assembly has the wild type. By default these rows are kept.')
+#subparser_reportfilter.add_argument('infile', help='Name of input tsv file')
+#subparser_reportfilter.add_argument('outfile', help='Name of output tsv file')
+#subparser_reportfilter.set_defaults(func=ariba.tasks.reportfilter.run)
#----------------------------- run -------------------------------
@@ -139,15 +141,16 @@ assembly_group = subparser_run.add_argument_group('Assembly options')
assembly_group.add_argument('--assembly_cov', type=int, help='Target read coverage when sampling reads for assembly [%(default)s]', default=50, metavar='INT')
assembly_group.add_argument('--min_scaff_depth', type=int, help='Minimum number of read pairs needed as evidence for scaffold link between two contigs [%(default)s]', default=10, metavar='INT')
-other_group = subparser_run.add_argument_group('Other options')
-#other_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
-other_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
-other_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
-other_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
-other_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
-other_group.add_argument('--noclean', action='store_true', help='Do not clean up intermediate files')
-other_group.add_argument('--tmp_dir', help='Existing directory in which to create a temporary directory used for local assemblies')
-other_group.add_argument('--verbose', action='store_true', help='Be verbose')
+other_run_group = subparser_run.add_argument_group('Other options')
+#other_run_group.add_argument('--threads', type=int, help='Number of threads [%(default)s]', default=1, metavar='INT')
+other_run_group.add_argument('--threads', type=int, help=argparse.SUPPRESS, default=1, metavar='INT')
+other_run_group.add_argument('--assembled_threshold', type=float, help='If proportion of gene assembled (regardless of into how many contigs) is at least this value then the flag gene_assembled is set [%(default)s]', default=0.95, metavar='FLOAT (between 0 and 1)')
+other_run_group.add_argument('--gene_nt_extend', type=int, help='Max number of nucleotides to extend ends of gene matches to look for start/stop codons [%(default)s]', default=30, metavar='INT')
+other_run_group.add_argument('--unique_threshold', type=float, help='If proportion of bases in gene assembled more than once is <= this value, then the flag unique_contig is set [%(default)s]', default=0.03, metavar='FLOAT (between 0 and 1)')
+other_run_group.add_argument('--force', action='store_true', help='Overwrite output directory, if it already exists')
+other_run_group.add_argument('--noclean', action='store_true', help='Do not clean up intermediate files')
+other_run_group.add_argument('--tmp_dir', help='Existing directory in which to create a temporary directory used for local assemblies')
+other_run_group.add_argument('--verbose', action='store_true', help='Be verbose')
subparser_run.set_defaults(func=ariba.tasks.run.run)
diff --git a/setup.py b/setup.py
index 416b0f4..dc7533f 100644
--- a/setup.py
+++ b/setup.py
@@ -55,7 +55,7 @@ vcfcall_mod = Extension(
setup(
ext_modules=[minimap_mod, fermilite_mod, vcfcall_mod],
name='ariba',
- version='2.2.5',
+ version='2.3.0',
description='ARIBA: Antibiotic Resistance Identification By Assembly',
packages = find_packages(),
package_data={'ariba': ['test_run_data/*']},
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/ariba.git
More information about the debian-med-commit
mailing list