[med-svn] [iva] 03/05: Imported Upstream version 1.0.4
Sascha Steinbiss
sascha at steinbiss.name
Thu Apr 28 22:04:57 UTC 2016
This is an automated email from the git hooks/post-receive script.
sascha-guest pushed a commit to branch master
in repository iva.
commit 0bb6f953ae62ed493a692729ce295d4a9b3f2c64
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Thu Apr 28 21:37:06 2016 +0000
Imported Upstream version 1.0.4
---
iva/__init__.py | 1 +
iva/assembly.py | 5 +-
iva/common.py | 2 +-
iva/external_progs.py | 8 +
iva/gage/GetFastaStats$ContigAt.class | Bin 489 -> 0 bytes
iva/gage/GetFastaStats.class | Bin 9674 -> 0 bytes
iva/gage/SizeFasta.class | Bin 3241 -> 0 bytes
iva/gage/SplitFastaByLetter.class | Bin 2497 -> 0 bytes
iva/gage/Utils$Pair.class | Bin 621 -> 0 bytes
iva/gage/Utils$ToProtein.class | Bin 4565 -> 0 bytes
iva/gage/Utils$Translate.class | Bin 1226 -> 0 bytes
iva/gage/Utils.class | Bin 5321 -> 0 bytes
iva/gage/getScaffoldStats$1.class | Bin 199 -> 0 bytes
iva/gage/getScaffoldStats$Scaffold.class | Bin 1232 -> 0 bytes
iva/gage/getScaffoldStats.class | Bin 12187 -> 0 bytes
iva/kcount.py | 16 +-
iva/kraken.py | 21 +--
iva/seed.py | 7 +-
iva/seed_processor.py | 7 +-
iva/test_data_runner.py | 50 ++++++
iva/test_run_data/hiv_pcr_primers.fa | 36 +++++
iva/test_run_data/iva_contigs_no_trimmomatic.fasta | 167 +++++++++++++++++++++
.../iva_contigs_with_trimmomatic.fasta | 152 +++++++++++++++++++
iva/test_run_data/reads_1.fq.gz | Bin 0 -> 3813239 bytes
iva/test_run_data/reads_2.fq.gz | Bin 0 -> 4661713 bytes
iva/test_run_data/reference.fasta | 152 +++++++++++++++++++
iva/tests/kcount_test.py | 8 +
scripts/iva | 19 +++
setup.py | 8 +-
29 files changed, 629 insertions(+), 30 deletions(-)
diff --git a/iva/__init__.py b/iva/__init__.py
index 825a828..47ee547 100644
--- a/iva/__init__.py
+++ b/iva/__init__.py
@@ -17,5 +17,6 @@ __all__ = [
'read_trim',
'seed',
'seed_processor',
+ 'test_data_runner',
]
from iva import *
diff --git a/iva/assembly.py b/iva/assembly.py
index 3a3a9ca..305c1d4 100644
--- a/iva/assembly.py
+++ b/iva/assembly.py
@@ -6,12 +6,13 @@ from iva import contig, mapping, seed, mummer, graph, edge, common
import pyfastaq
class Assembly:
- def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0):
+ def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, kmc_threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0):
self.contigs = {}
self.contig_lengths = {}
self.map_index_k = map_index_k
self.map_index_s = map_index_s
self.threads = threads
+ self.kmc_threads = kmc_threads
self.max_insert = max_insert
self.map_minid = map_minid
self.min_clip = min_clip
@@ -583,7 +584,7 @@ class Assembly:
made_seed = False
for i in range(max_attempts):
- s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs)
+ s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, kmc_threads=self.kmc_threads, map_threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs)
if s.seq is None or len(s.seq) == 0:
break
diff --git a/iva/common.py b/iva/common.py
index 5a8c804..271d35f 100644
--- a/iva/common.py
+++ b/iva/common.py
@@ -2,7 +2,7 @@ import argparse
import os
import sys
import subprocess
-version = '1.0.0'
+version = '1.0.4'
class abspathAction(argparse.Action):
def __call__(self, parser, namespace, value, option_string):
diff --git a/iva/external_progs.py b/iva/external_progs.py
index a727111..8b42054 100644
--- a/iva/external_progs.py
+++ b/iva/external_progs.py
@@ -2,6 +2,7 @@ import shutil
import subprocess
import re
import sys
+from distutils.version import LooseVersion
import pyfastaq
from iva import common
@@ -25,6 +26,11 @@ prog_to_version_cmd = {
'samtools': ('samtools', re.compile('^Version: (.*)$')),
}
+
+minimum_versions = {
+ 'samtools': '0.1.19'
+}
+
assembly_progs = [
'kmc',
@@ -79,6 +85,8 @@ def get_all_versions(progs, must_be_in_path=True):
info = []
for prog in sorted(progs):
version = get_version(prog, must_be_in_path=must_be_in_path)
+ if prog in minimum_versions and LooseVersion(version) < LooseVersion(minimum_versions[prog]):
+ raise Error('Found version ' + version + ' of ' + prog + ' but must be at least ' + minimum_versions[prog] + '. Cannot continue')
info.append(' '.join(['Using', prog, 'version', version]))
return info
diff --git a/iva/gage/GetFastaStats$ContigAt.class b/iva/gage/GetFastaStats$ContigAt.class
deleted file mode 100644
index 7b71d97..0000000
Binary files a/iva/gage/GetFastaStats$ContigAt.class and /dev/null differ
diff --git a/iva/gage/GetFastaStats.class b/iva/gage/GetFastaStats.class
deleted file mode 100644
index 11e678e..0000000
Binary files a/iva/gage/GetFastaStats.class and /dev/null differ
diff --git a/iva/gage/SizeFasta.class b/iva/gage/SizeFasta.class
deleted file mode 100644
index 4724b73..0000000
Binary files a/iva/gage/SizeFasta.class and /dev/null differ
diff --git a/iva/gage/SplitFastaByLetter.class b/iva/gage/SplitFastaByLetter.class
deleted file mode 100644
index 444915d..0000000
Binary files a/iva/gage/SplitFastaByLetter.class and /dev/null differ
diff --git a/iva/gage/Utils$Pair.class b/iva/gage/Utils$Pair.class
deleted file mode 100644
index c404bc5..0000000
Binary files a/iva/gage/Utils$Pair.class and /dev/null differ
diff --git a/iva/gage/Utils$ToProtein.class b/iva/gage/Utils$ToProtein.class
deleted file mode 100644
index c833220..0000000
Binary files a/iva/gage/Utils$ToProtein.class and /dev/null differ
diff --git a/iva/gage/Utils$Translate.class b/iva/gage/Utils$Translate.class
deleted file mode 100644
index 6842928..0000000
Binary files a/iva/gage/Utils$Translate.class and /dev/null differ
diff --git a/iva/gage/Utils.class b/iva/gage/Utils.class
deleted file mode 100644
index 66949ba..0000000
Binary files a/iva/gage/Utils.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats$1.class b/iva/gage/getScaffoldStats$1.class
deleted file mode 100644
index d6b14b7..0000000
Binary files a/iva/gage/getScaffoldStats$1.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats$Scaffold.class b/iva/gage/getScaffoldStats$Scaffold.class
deleted file mode 100644
index c1c69fb..0000000
Binary files a/iva/gage/getScaffoldStats$Scaffold.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats.class b/iva/gage/getScaffoldStats.class
deleted file mode 100644
index de57f0f..0000000
Binary files a/iva/gage/getScaffoldStats.class and /dev/null differ
diff --git a/iva/kcount.py b/iva/kcount.py
index 1b8a957..1146c58 100644
--- a/iva/kcount.py
+++ b/iva/kcount.py
@@ -52,14 +52,14 @@ def _median(d):
return key
-def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail):
+def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail, threads=1):
f = pyfastaq.utils.open_file_write(script)
print('set -e', file=f)
kmc_command = ''.join([
'kmc -fa',
' -m', str(m_option),
' -k', str(kmer),
- ' -sf', '1',
+ ' -sf', str(threads),
' -ci', str(min_count),
' -cs', str(max_count),
' -cx', str(max_count),
@@ -80,7 +80,7 @@ def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_o
return common.syscall('bash ' + script, allow_fail=allow_fail)
-def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0):
+def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0, threads=1):
'''Runs the kmer counting program kmc on a FASTA file. Returns filename made by kmc of the counts of kmers'''
reads = os.path.abspath(reads)
tmpdir = tempfile.mkdtemp(prefix='tmp.run_kmc.', dir=os.getcwd())
@@ -92,11 +92,11 @@ def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0):
# The range is 4-32 (GB).
# Try 4 and 32 (the default), then give up. This seems to make a difference, regardless of
# RAM available on the machine.
- ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True)
+ ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True, threads=threads)
if not ran_ok:
if verbose:
print('First try of running kmc failed. Trying again with -m4 instead of -m32...', flush=True)
- ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False)
+ ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False, threads=threads)
os.chdir(original_dir)
shutil.rmtree(tmpdir)
@@ -204,7 +204,7 @@ def _counts_file_to_fasta(infile, outfile):
pyfastaq.utils.close(fout)
-def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, threads=1):
+def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, kmc_threads=1, map_threads=1):
'''Gets the most common kmers from a pair of interleaved read FASTA or FASTQ files. Takes the first N sequences (determined by head). Returns a dict of kmer=>frequency. If kmer length is not given, use min(0.8 * median read length, 95)'''
tmpdir = tempfile.mkdtemp(prefix='tmp.common_kmers.', dir=os.getcwd())
counts = {}
@@ -217,8 +217,8 @@ def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_cou
kmer_length = min(int(0.8 * _median(read_lengths)), 95)
if method == 'kmc':
- counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose)
- counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=threads)
+ counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose, threads=kmc_threads)
+ counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=map_threads)
else:
raise Error('Method "' + method + '" not supported in kcount.get_most_common_kmers(). Cannot continue.')
diff --git a/iva/kraken.py b/iva/kraken.py
index 5b95b8b..fc5bbda 100644
--- a/iva/kraken.py
+++ b/iva/kraken.py
@@ -1,3 +1,4 @@
+import stat
import inspect
import sys
import os
@@ -25,7 +26,7 @@ class Database:
self.minimizer_len = minimizer_len
self.max_db_size = max_db_size
self.current_taxon_id = 2000000000
- self.current_gi = 4000000000
+ self.current_gi = 4000000000
self.preload = preload
self.verbose = verbose
self.taxon_to_parent = {}
@@ -88,11 +89,11 @@ class Database:
'genbank_ids': genbank_ids,
'new_gis': new_gis,
}
-
+
self.current_taxon_id += 1
pyfastaq.utils.close(f)
-
+
def _download_from_genbank(self, outfile, filetype, gi, max_tries=5, delay=3):
assert filetype in ['gb', 'fasta']
file_ok = False
@@ -137,7 +138,7 @@ class Database:
gi = None
for line in f:
if line.startswith(' /db_xref="taxon:'):
- taxon_id = line.rstrip().split(':')[-1].rstrip('"')
+ taxon_id = line.rstrip().split(':')[-1].rstrip('"')
elif line.startswith('VERSION'):
gi = line.rstrip().split()[-1].split(':')[-1]
if None not in [taxon_id, gi]:
@@ -161,7 +162,7 @@ class Database:
iva.common.syscall('grep -v CONTIG ' + infile + ' > tmp.gbk; mv tmp.gbk ' + infile)
iva.common.syscall(genbank2embl + ' ' + infile + ' ' + outfile, verbose=self.verbose)
shutil.rmtree(tmpdir)
-
+
def _append_to_file(self, filename, line):
try:
@@ -199,7 +200,7 @@ class Database:
]) + '\t|'
self._append_to_file(self.kraken_nodes_dmp, line)
self.added_to_kraken.add(new_taxon)
- self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon))
+ self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon))
iva.common.syscall('kraken-build --add-to-library ' + fa_file + ' --db ' + self.kraken_db, verbose=self.verbose)
@@ -254,7 +255,7 @@ class Database:
print('unlink', os.path.exists(fa_file), fa_file)
os.unlink(gb_file)
os.unlink(fa_file)
-
+
def _build_kraken_virus_db(self):
if os.path.exists(self.done_files['clean']):
@@ -270,7 +271,7 @@ class Database:
iva.common.syscall('kraken-build --download-taxonomy --db ' + self.kraken_db, verbose=self.verbose)
if not self.skip_virus_download:
iva.common.syscall('kraken-build --download-library viruses --db ' + self.kraken_db, verbose=self.verbose)
-
+
if self.extra_refs_file is not None:
self._load_extra_ref_info()
self._download_extra_refs()
@@ -299,7 +300,7 @@ class Database:
if os.path.exists(self.extra_refs_dir):
shutil.rmtree(self.extra_refs_dir)
iva.common.syscall('touch ' + self.done_files['clean'], verbose=self.verbose)
-
+
def _get_genbank_virus_files(self):
if os.path.exists(self.done_files['make_embl']):
@@ -336,7 +337,7 @@ class Database:
print()
new_dir = re.sub('_uid[0-9]+$', '', directory).strip('_')
if new_dir != directory:
- os.rename(directory, new_dir)
+ os.rename(directory, new_dir)
final_dir = os.path.join(self.embl_root, os.path.basename(new_dir))
if os.path.exists(final_dir):
diff --git a/iva/seed.py b/iva/seed.py
index 883b689..993e31f 100644
--- a/iva/seed.py
+++ b/iva/seed.py
@@ -8,13 +8,14 @@ from iva import kcount, kmers, mapping
class Error (Exception): pass
class Seed:
- def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, threads=1, sequences_to_ignore=None, contigs_to_check=None):
+ def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None):
if contigs_to_check is None:
contigs_to_check = {}
if sequences_to_ignore is None:
sequences_to_ignore = set()
self.verbose = verbose
- self.threads = threads
+ self.kmc_threads = kmc_threads
+ self.map_threads = map_threads
self.extend_length = extend_length
self.ext_min_cov = ext_min_cov
self.ext_min_ratio = ext_min_ratio
@@ -23,7 +24,7 @@ class Seed:
if seq is None:
if reads1 is None:
raise Error('Cannot construct Seed object. Need reads when no seq has been given')
- kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check)
+ kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads)
if len(kmer_counts) == 1:
self.seq = list(kmer_counts.keys())[0]
if self.verbose:
diff --git a/iva/seed_processor.py b/iva/seed_processor.py
index 316a785..d62d6b3 100644
--- a/iva/seed_processor.py
+++ b/iva/seed_processor.py
@@ -9,7 +9,7 @@ import pyfastaq
class Error (Exception): pass
class SeedProcessor:
- def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000):
+ def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1):
self.seeds_fasta = seeds_fasta
self.reads1 = reads1
self.reads2 = reads2
@@ -17,6 +17,7 @@ class SeedProcessor:
self.index_k = index_k
self.index_s = index_s
self.threads = threads
+ self.kmc_threads = kmc_threads
self.max_insert = max_insert
self.minid = minid
self.seed_stop_length = seed_stop_length
@@ -61,7 +62,9 @@ class SeedProcessor:
verbose = self.verbose,
seed_length = self.seed_length,
seed_min_count = self.seed_min_count,
- seed_max_count = self.seed_max_count
+ seed_max_count = self.seed_max_count,
+ kmc_threads = self.kmc_threads,
+ map_threads = self.threads
)
if len(new_seed) == 0:
print('Warning: could not get most common kmer for', seed_name)
diff --git a/iva/test_data_runner.py b/iva/test_data_runner.py
new file mode 100644
index 0000000..6585899
--- /dev/null
+++ b/iva/test_data_runner.py
@@ -0,0 +1,50 @@
+import os
+import iva
+
+class Error (Exception): pass
+
+class Tester:
+ def __init__(self, outdir, iva_script, trimmo_jar=None, threads=1):
+ self.outdir = os.path.join(outdir)
+ if os.path.exists(self.outdir):
+ raise Error('Output directory alread exists. Cannot continue')
+
+ self.iva_script = iva_script
+ self.trimmo_jar = trimmo_jar
+ self.threads = threads
+
+
+ def _copy_input_files(self):
+ extractor = iva.egg_extract.Extractor(os.path.abspath(os.path.join(os.path.dirname(iva.__file__), os.pardir)))
+ test_files = os.path.join('iva', 'test_run_data')
+ extractor.copy_dir(test_files, self.outdir)
+ print('Copied input test files into here:', os.path.abspath(self.outdir))
+
+
+ def _run_iva(self):
+ os.chdir(self.outdir)
+ cmd = self.iva_script + ' --threads ' + str(self.threads)
+ if self.trimmo_jar:
+ cmd += ' --trimmomatic ' + self.trimmo_jar
+
+ cmd += ' --pcr_primers hiv_pcr_primers.fa -f reads_1.fq.gz -r reads_2.fq.gz iva.out'
+
+ print('Current working directory:', os.getcwd())
+ print('Running iva on the test data with the command:', cmd, sep='\n')
+ iva.common.syscall(cmd)
+
+
+ def _check_output(self):
+ print('Finished running iva')
+ expected_contigs_file = os.path.abspath(os.path.join('iva.out', 'contigs.fasta'))
+ if os.path.exists(expected_contigs_file):
+ print('Looks OK. Final output contigs file is:', expected_contigs_file)
+ else:
+ print('Something went wrong! Final output contigs file not found:', expected_contigs_file)
+
+
+ def run(self):
+ self._copy_input_files()
+ self._run_iva()
+ self._check_output()
+
diff --git a/iva/test_run_data/hiv_pcr_primers.fa b/iva/test_run_data/hiv_pcr_primers.fa
new file mode 100644
index 0000000..90d2288
--- /dev/null
+++ b/iva/test_run_data/hiv_pcr_primers.fa
@@ -0,0 +1,36 @@
+>Pan-HIV-1_1F.1
+AGCCCGGGAGCTCTCTG
+>Pan-HIV-1_1F.2
+AGCCTGGGAGCTCTCTG
+>Pan-HIV-1_1R.1
+CCTCCAATTCCCCCTATCATTTT
+>Pan-HIV-1_1R.2
+CCTCCAATTCCTCCTATCATTTT
+>Pan-HIV-1_2F.1
+GGGAAGTGACATAGCAGGAAC
+>Pan-HIV-1_2F.2
+GGGAAGTGACATAGCTGGAAC
+>Pan-HIV-1_2F.3
+GGGAAGTGATATAGCAGGAAC
+>Pan-HIV-1_2F.4
+GGGAAGTGATATAGCTGGAAC
+>Pan-HIV-1_2R.1
+CTGCCATCTGTTTTCCATAATC
+>Pan-HIV-1_2R.2
+CTGCCATCTGTTTTCCATAGTC
+>Pan-HIV-1_3F
+TTAAAAGAAAAGGGGGGATTGGG
+>Pan-HIV-1_3R.1
+TGGCCTGTACCGTCAGCG
+>Pan-HIV-1_3R.2
+TGGCTTGTACCGTCAGCG
+>Pan-HIV-1_4F
+CCTATGGCAGGAAGAAGCG
+>Pan-HIV-1_4R.1
+CTTATATGCAGCATCTGAGGG
+>Pan-HIV-1_4R.2
+CTTATATGCAGCTTCTGAGGG
+>Pan-HIV-1_4R.3
+CTTTTATGCAGCATCTGAGGG
+>Pan-HIV-1_4R.4
+CTTTTATGCAGCTTCTGAGGG
diff --git a/iva/test_run_data/iva_contigs_no_trimmomatic.fasta b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta
new file mode 100644
index 0000000..9ab1fa0
--- /dev/null
+++ b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta
@@ -0,0 +1,167 @@
+>contig.00001
+GCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTAAAGTG
+GTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCTAGACT
+GAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAATAGGGA
+CTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAGGTGCA
+CACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGAGGCTA
+GAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGCATGGG
+AAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGTATGGG
+CAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGAAGGAT
+GTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACTTAAAT
+CTTTATATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGGGGTAAAAGACA
+CCAAGGAAGCTCTAGATAAAATAGAGGAAATACAAAATAAGAGCCAGCAAAAGACACAGC
+AGGCAGCCGCTAGCACAGGAAGCAGCGGCAAAGTCAGTCAAAATTACCCTATAGTGCAAA
+ATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATGGGTGA
+AAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATTATCAG
+AGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCAGGCAG
+CAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGATACACC
+CAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGACATAG
+CAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACCTATCC
+CAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAGAATGT
+ATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGACTATG
+TGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAATTGGA
+TGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAAAGCAT
+TAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCTA
+GCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATATAATGA
+TGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAAAGAAG
+GACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGGGAAGG
+AAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGC
+CTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGCCCCAC
+CAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCAGAAGG
+ACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGGCAGCG
+ACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGC
+AGATGATACAGTATTAGAAGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGG
+GGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATTTGTGG
+AAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGACGAAA
+TATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACTGTACC
+AGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAGTGGCCATTGACAGAAGA
+AAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATCTCAAA
+AATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGACAGCAC
+CAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTTTGGGA
+AGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACT
+AGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTATACTGC
+ATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAATGTGCT
+GCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGA
+TCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTGTATGT
+AGGATCTGACTTAGAGATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACACCATCT
+GTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTG
+GATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTGCCAGAAAA
+AGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGCCA
+GATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAAACACT
+AACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGAT
+TCTAAAAGAACCAGTACATGGAACATATTATGACCCATCAAAAGACTTAATAGCAGAAAT
+ACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCT
+GAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTATTAAC
+AGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAGATT
+TAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAGGCTAC
+CTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTACCAATT
+AGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGTAGGGA
+GACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTTTCCCT
+AACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAGGATTC
+AGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACA
+ACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAGGA
+AAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGA
+TAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAAGCTCA
+AGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAATTTGCC
+ACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGGGAAGC
+TATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACATCTAGA
+AGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTTAT
+CCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCC
+AGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAAGCAGC
+CTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGG
+AGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAGCAAGC
+TGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGG
+GGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACA
+AACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGA
+CAGCAGAGACCCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCAGT
+AGTAATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAG
+GGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAAGATTA
+AAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGAGTGGT
+TTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATATCCCAC
+TAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAGGGACT
+GGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACAAATAG
+ATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGACTCTG
+CCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATCAGGAC
+ATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAAAAAGA
+TAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCCCCAGA
+AGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAGGAGCT
+TAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAGCACAT
+CTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTGCAACA
+ACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATACCAGG
+GAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCGGGGA
+GTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTGCCAAC
+TATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACACCGAC
+GAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGTACTAA
+ATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAGGAATA
+ATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAACAAAAG
+AAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATGAGAGT
+GACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTTGGGTT
+GGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTTCCTGT
+GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA
+AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT
+ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT
+GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC
+TCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACTGACAC
+TGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCTTTTAA
+TATGACCACAGAACTAAATGATAAGCAGCGGAAGATTCATGCACTTTTTTATAAGCTTGA
+TATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACTTCAGT
+CATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGTACTCC
+AGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCATGTAA
+AAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAATTGCT
+GTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACAGATAA
+TACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGACCCTC
+TAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGACCAGG
+AGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGGAGGAA
+AGTCTTAGAACAGGTAACTGGAAAACTAAAAGAACACTTTAATAAGACAATAATCGTTGA
+ACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGGGAATT
+TTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACAGGGGG
+GTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATGTGGCA
+AGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGTATCAAA
+TATTACAGGAATGCTATTGACAAGAGATGGTGGTAATACAACTAATGAGACCTTCAGACC
+TGGAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACA
+AATTGATCCACTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGGACAGAGAAAA
+AAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGAAGCACTAT
+GGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGTATAGTGCA
+ACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAACTCACAGT
+CTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCA
+AAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCTGTGCCCTG
+GAACACCTCTTGGAGTAATAAATCTTATGAAGAAATTTGGAACAACATGACATGGATAGA
+ATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACAAAATCGCA
+GGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCAAGTCTGTG
+GACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATGATAGTAGG
+AGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGAGTTAGGCA
+GGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCCGACAGGCC
+CGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGATTAGTCAG
+CGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTCTACCACCG
+CTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACACAACAGTCT
+CAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTGTTGTATTG
+GGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATAGCAGTAGG
+GGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTTCTCCACAT
+ACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGGCAAGTGGT
+CAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAACAAACCCTC
+AAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGTAACAA
+GTACTAATATGAATAATGCAGATAGTGTCTGGCTGAGAGCACAAGAAGAAGATAACGAGG
+GGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCAT
+TTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGA
+AAAGACAAGAGATCATTGACTTATGGGTTTATAATACACAAGGCTACTTCCCTGATTGGC
+AAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTGCTTCAAGT
+TAGTACCAGTTGACCCAAGTGAAGTAGAGGAGAACAACAAAGGAGAAAACAACTGCCTGC
+TACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGATGTGGAAGT
+TTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTACTATAAAG
+ACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGGGAGGTGTG
+GCCGGGGCGGAGCTGGGGAGTGGTTAACCCTCAGAAGCTGCATAAAAGAGATCGGAAGAG
+CGGTTCAGCAGGAATGCCGAGACCGATCTCGGTCGTGTATCTCGTATGCCGTCTTCTGCT
+TGA
+>contig.00002
+GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA
+AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT
+ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT
+GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC
+TCTCTGTGTTACTTTACATTGTACCAATGTCACTAACATAGGAGGAGATGGAACAAAGGA
+AGTAAGAAACTGTTCTTTTAATATGACCACAGAACTAAAAGATAAGAAGCGGGAGATTCA
+TGCACTTTTTTATAGGCTTGATATAGTACCAGTTGATCCTAAGGCTAATAATAGTGAGTA
+TAGGTTAATAAATTGTAATACTTCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGA
+TCCAATTCCTATACATTATTGCACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAA
+GAATTTCAATGGGACAGGGCCATGTAAAAATGTCAGCTCAGTACAATGCACACATGGAAT
+TAAGCCAGTGGTATCAACTCAATTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAAT
+AATCA
diff --git a/iva/test_run_data/iva_contigs_with_trimmomatic.fasta b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta
new file mode 100644
index 0000000..f3c3e05
--- /dev/null
+++ b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta
@@ -0,0 +1,152 @@
+>contig.00001
+CTCTGGCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTA
+AAGTGGTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCT
+AGACTGAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAAT
+AGGGACTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAG
+GTGCACACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGA
+GGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGC
+ATGGGAAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGT
+ATGGGCAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGA
+AGGATGTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACT
+TAAATCTTTATATAATACAGTAGCAACCCTCTGGTGCGTACACCAAAGGACAGATGTAAA
+AGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGCACAAAACAGGAACCAGCAAAAGAC
+ACAGCAGGCAGCCGCTAGCACAGGAAGCAGCAGCAACGTCAGCCAAAATTACCCTATAGT
+GCAAAATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATG
+GGTGAAAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATT
+ATCAGAGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCA
+GGCAGCAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGAT
+ACACCCAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGA
+CATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACC
+TATCCCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAG
+AATGTATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGA
+CTATGTGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAA
+TTGGATGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAA
+AGCATTAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGG
+ACCTAGCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATAT
+AATGATGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAA
+GGAAGGACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGG
+GAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAAT
+TTGGCCTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGC
+CCCACCAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCA
+GAAGGACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGG
+CAGCGACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACA
+GGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATG
+ATAGGGGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATT
+TGTGGAAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGA
+CGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACT
+GTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAGGTTAAACAGTGGCCATTAACA
+GAAGAAAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATC
+TCAAAAATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGAC
+AGCACCAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAGGACTTT
+TGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACA
+GTACTAGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTAT
+ACTGCATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAAT
+GTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATC
+TTAGATCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTG
+TATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACAC
+CATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTC
+CTTTGGATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTTCCA
+GAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCA
+AGCCAGATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAA
+ACACTAACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGA
+GAGATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCA
+GAAATACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAA
+AATCTGAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTA
+TTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCT
+AGATTTAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAG
+GCTACCTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTAC
+CAATTAGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGT
+AGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTT
+TCCCTAACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAG
+GATTCAGGATCAGAAGTGAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAA
+GCACAACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAA
+AAGGAAAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAA
+GTAGATAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAA
+GCTCAAGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAAT
+TTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGG
+GAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACAT
+CTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAA
+GTTATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGA
+TGGCCAGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAA
+GCAGCCTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGT
+CAAGGAGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAG
+CAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGA
+AAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGAC
+ATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTAC
+AGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGG
+GCAGTAGTAATACAAGACAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAGATT
+ATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAA
+GATTAAAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGA
+GTGGTTTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATAT
+CCCACTAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAG
+GGACTGGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACA
+AATAGATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGA
+CTCTGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATC
+AGGACATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAA
+AAAGATAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCC
+CCAGAAGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAG
+GAGCTTAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAG
+CACATCTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTG
+CAACAACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATA
+CCAGGGAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCC
+GGGGAGTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTG
+CCAACTATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACA
+CCGACGAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGT
+ACTAAATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAG
+GAATAATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAAC
+AAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATG
+AGAGCGACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTT
+GGGTTGGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTT
+CCTGTGTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAG
+ACAGAAATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAG
+CAGATGCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGCTA
+ACTCCTCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACT
+GACACTGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCT
+TTTAATATGACCACAGAACTAAATGATAAGCAGCGGCAGATTCATGCACTTTTTTATAAG
+CTTGATATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACT
+TCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGT
+ACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCA
+TGTAAAAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAA
+TTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACA
+GATAATACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGA
+CCCTCTAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGA
+CCAGGAGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGG
+AGGAAAGTTTTAAAACAGGTAACTGAAAAACTAAAAGAACACTTTAATAAAACAATACAC
+GTTGAACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGG
+GAATTTTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACA
+GGGGGGTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATG
+TGGCAAGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGAA
+TCAAATATTACAGGAATACTATTGACAAGAGATGGTGGTCATAATTCAACTAATGAGACC
+TTCAGACCTGAAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAA
+GTAGTACAAATTGATCCACTAGGAATAGCACCCACCAGGGCAAAAAGAAGAGTGGTGGAC
+AGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGA
+AGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGT
+ATAGTGCAACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAA
+CTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTA
+AAGGATCAAAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCT
+GTGCCCTGGAACACCTCTTGGAGTAATAAATCTCATGACGAGATTTGGAACAACATGACA
+TGGATAGAATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACA
+AAATCGCAGGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCA
+AGTCTGTGGACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATG
+ATAGTAGGAGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGA
+GTTAGGCAGGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCC
+GACAGGCCCGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGA
+TTAGTCAGCGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTC
+TACCACCGCTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACAC
+AACAGTCTCAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTG
+TTGTATTGGGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATA
+GCAGTAGGGGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTT
+CTCCACATACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGG
+CAAGTGGTCAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAAC
+AAACCCTCAAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGC
+AGTAACAAGTACTAATATGAATAATGCTGATAGTGTCTGGCTGAGAGCACAAGAAGAAGA
+TAACGAGGGGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAA
+GGGAGCATTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTA
+CTCCCAGAAAAGACGAGAGATCCTTGACTTATGGGTTTATAATACACAAGGCTACTTCCC
+TGATTGGCAAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTG
+CTTCAAGTTAGTACCAGTTGACCCAAGCGAAGTAGAGGAGAACAACAAAGGAGAAAACAA
+CTGCCTGCTACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGAT
+GTGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTA
+CTATAAAGACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGG
+GAGGTGTGGCCGGGGCGGAGCTGGGGAGTGGTTAA
diff --git a/iva/test_run_data/reads_1.fq.gz b/iva/test_run_data/reads_1.fq.gz
new file mode 100644
index 0000000..8f7a52a
Binary files /dev/null and b/iva/test_run_data/reads_1.fq.gz differ
diff --git a/iva/test_run_data/reads_2.fq.gz b/iva/test_run_data/reads_2.fq.gz
new file mode 100644
index 0000000..8e74d0c
Binary files /dev/null and b/iva/test_run_data/reads_2.fq.gz differ
diff --git a/iva/test_run_data/reference.fasta b/iva/test_run_data/reference.fasta
new file mode 100644
index 0000000..acda52b
--- /dev/null
+++ b/iva/test_run_data/reference.fasta
@@ -0,0 +1,152 @@
+>DQ234790
+cgaacagggacttgaaagcgaaagttaatagggactcgaaagcgaaagttccagagaagt
+tctctcgagcgcaggactcggcttgctgaggtgcacacagcaagaggcgagagcggcgac
+tggtgagtacgccaaattttgactagcggaggctagaaggagagagatgggtgcgagagc
+gtcaatattaacaggggaaaaattagatgcatgggaaaaaattcggttacggccaggggg
+aaagaaaaaatatatgataaaacatctagtatgggcaagcagagagttggaaagattcgc
+acttaaccctggccttttagaaacagcggaaggatgtcaacagataatagaacagttaca
+gtcaactctcaagacaggatcagaagaacttaaatcattatttaatacagtagcaaccct
+ctggtgcgtacaccaaaggatagaggtaaaagacaccaaggaagctttagataaattaga
+ggaaatacaaaataagaaccagaaaaagacacagcaggcagcagctggcacaggaagcaa
+cagcaaagtcagccaaaattaccctatagtgcaaaatgcacaaggacaaatgatacatca
+gtctttatcacctagaactttgaatgcatgggtgaaagtagtagaagaaaagggctttaa
+cccagaagtaatacccatgttctcagcattatcagagggagccgctccacaagatttaaa
+tatgatgctaaatatagtggggggacaccaggcagcaatgcaaatgttaaaagaaaccat
+caatgaggaagctgcagaatgggatagggtacacccagtacatgcagggcctattccacc
+aggccaaatgagggaaccaaggggaagtgacatagcaggaaccactagtacccttcaaga
+acaaataggatggatgacaagcaatccacytatcccagtgggagacatctataaaaggtg
+gataattctgggattaaataaaatagtaagaatgtatagccctgttagcattttggacat
+aagacaagggccaaaagaacccttcagagactatgtagataggttctataaaactctcag
+agcggaacaagctacacaggaagtaaaaaattggatgacagaaaccttgctagtccaaaa
+tgcgaatccagactgtaagtccattttaaaagcattaggagcaggagctactttagaaga
+aatgatgacagcatgccagggagtgggaggacctagccataaagcaagggttttggctga
+ggcaatgaaccaagcacaacagacaactgtaatgatgcagagaggcaattycaagggcca
+gaaaagaattaagtgcttcaactgtggcagggaaggacacctagccagaaattgcagggc
+ccctagaaaaaagggttgttggaaatgcgggaaggaaggacatcaaatgaaagactgcac
+tgagagacaggctaattttttagggaaaatttggcctcccaacaaggggaggccagggaa
+ttttcctcagagcagaccagagccttcagccccaccagcggaaaactggagggagataac
+ctccttactgaagcaggagcagaaggacaaggaacacccttctccttcaatctccctcaa
+atcactctttggcaacgaccccttgtcacagtaaaaataggaggacagctaaaagaagct
+ctattagatacaggagcagatgatacagtattagaagatataaatttgccaggaaaatgg
+aaaccaaaaatgatagggggaattggaggttttatcaaagtaagacaatatgatcagata
+cttatagaaatttgtggaaaaaaggctataggtacagtattagtaggacctacacctgtc
+aacataattgggcgaaatatgttgactcagattggctgtactttaaatttcccaatcagt
+cctattgacactgtaccagtaaaattaaagccaggaatggatggaccaaaggttaaacag
+tggccattgacagaagaaaaaataaaagcattaacagaaatttgtaaagagatggaagag
+gaaggaaagatctcaaaaattgggcctgaaaatccatacaatactccagtatttgctata
+aagaaaaaggacagcaccaaatggaggaaattagtagatttcagagagctcaataaaaga
+acccaggacttttgggaaattcaattaggaataccacacccagcaggtttaaaaaagaaa
+aaatcagtaacagtactagatgtgggagatgcatatttttcagttccattagataaayat
+tttagaaagtatacagcattcaccatacctagtataaacaatgagacaccaggaatcaga
+tatcagtacaatgtgctgccacagggatggaaaggatcaccggcaatattccagagtagc
+atgacaaaaatcttagaaccttttagagcaaacaatccagaaataattatctatcaatac
+atggatgacttgtatgtaggatctgacttagaaataggacagcatagaataaaaatagag
+gagctgagagctcatttattaagctggggatttactacaccagacaaaaagcatcagaag
+gaacctccattcctttggatggggtatgaactccatcctgacagatggacagtccagcct
+atagaactgccagaaaaagacagctggactgtcaatgatatacagaaattagtgggaaaa
+ctaaattgggcaagtcaaatttatgcagggattaagataaagcaattgtgtagactcctc
+aggggagctaaagcactaacagacgtagtaccactgactgaagaagcagaattagaattg
+gcagagaacagggagattctaaaaacccctgtgcatggagtatattatgacccatcaaaa
+gacttagtagcagaagtacagaagcaaggacaagaccaatggacatatcaaatttatcaa
+gagccatttaaaaatctaaaaacaggaaaatatgcaagaaaaaggtctgctcacactaat
+gatgtaagacaattagcagaagtggtgcaaaaaatagtcacagaaagcatagtaatatgg
+ggaaaggcccctaaatttaaactacccatacaaagagaaacatgggaaacatggtggatg
+gagtattggcaggctacctggattcctgaatgggagtttgtcaatacccctcctctagta
+aaattatggtaccaattagaaaaagaccccatagtgggagcagaaaccttctatgtagat
+ggggcagctagtagggaaactaagctaggaaaagcagggtatgtcactgacagaggaaga
+caaaaggtagtttccctaactgaaacaacaaatcaaaagactgagttacatgcaatctat
+ttagccttgcaagattcaggatcagaagtaaatatagtaacagactcacaatatgcatta
+ggaatcattcaggcacaaccagacaggagtgaatcagaaatagttagccaaataatagag
+gagctaataaaaaaggaaaaagtctacctgtcatgggtaccagcacataaagggattgga
+ggtaataacaaagtagataaattagtcagttcaggaatcaggaaagtgctatttttaaat
+gggatagataaggctcaagaagaacatgaaagatatcacagcaattggagaacaatggct
+agtgattttaatttgccacctatagtagcaaaggaaatagtagccaactgtgataaatgt
+caactaaaaggggaagctatgcatggacaagtagattgtagtccagggatatggcaatta
+gattgcacacatctagagggaaaagtcatcctggtagcagtccacgtggccagtggatat
+atagaagcagaagttatcccagcagaaacaggacaggagacagcatacttcctgctaaaa
+ttagcaggaagatggccagtaaaagtcatacacacagacaatggtagcaatttcaccagc
+gctgcagttaaagcagcctgttggtgggccaatgtccgacaggaatttgggatcccctac
+aatccccaaagtcaaggagtagtagaatctatgaataaggacttaaagaaaatcataggg
+caggtaagagaacaagctgaacatcttaagacagcagtacaaatggcagtattcattcac
+aattttaaaagaaaaggggggattggggggtacagtgcaggggaaagaataatagacata
+atagcaacagacatacaaactaaagaattacaaaaacaaattacaaaaattcaaaatttt
+cgggtctattacagggacagcagagacccaatttggaaaggaccagcaaaactactctgg
+aaaggtgaaggggcagtagtaatacaagacaatagtgatataaaagtagtgccaagaaga
+aaagcaaaaatcattagggattatggaaaacagatggcaggtgatgattgtgtggcaggt
+agacaggatgaggattagaacatggaacagtttagtaaaacatcatatgtatgtctcaaa
+gaaagctaaaaagtggtattatagacatcattatgaaagccagcatccaaagataagctc
+agaagtacacatcccactaggagaggctagattagtaataaaaacatattggggtctgca
+gacaggagaaaaggactggcaattgggtcatggagtctccatagaatggagacagagaaa
+ctatagcacacaaatagatcctgaagtagcagaccgactgattcatctacaatattttga
+ctgttttgcagactctgccataaggagagccatactaggacaagtagttagatataagtg
+tgaatatccatcaggacataacaaggtaggatctctacaatacttggcactaagggcatt
+aacagggccaaaagggagcaggccgcctctgcccagtgtaaagaaattaacagaagatag
+atggagcgagccccagaagaccaggggccacagagagaaccctacaatgaatggacatta
+gaactattagaggagcttaaaaatgaagctgytagacattttcctrggccctggctccat
+agcttaggacagtacatctatgatacttatggggatacttgggaaggggttgaagctata
+acaagaactttgcagcaactactgtttgttcatttcagaattgggtgtcaacatagcaga
+ataggcattataccagggagaagaggcaggaatggagccagtagatcctaacctagagcc
+ctggaatcatccgggaagtaagcctacaaccgcttgtaccaagtgttactgtaaaatatg
+ttcctggcattgccaattatgctttctgaaaaaaggcttaggcatctcctatggcaggaa
+gaagcggaagcaccgacgaggaactcctcggagcagtgagggccatcaaaatcctgtacc
+aaagcagtaagtatttgtaaaataagtaaatgtaatgacacctcttcaaattagtgcaat
+agtaggactgatagtagcgctaatcttagcaatagtagtgtggactatagtaggtttaga
+agttaggaaaatactaaggcaaagaaaaatagataggttaattaagaaaataagagaaag
+agaagaagacagtggaaatgagagtgaaggagacacagatgaattggccaaacttgtgga
+gatgggggactttgatccttgggttggtgataatttgtagtgcctcagacaacttgtggg
+ttacagtttattatggggtccctgtgtggaaagatgcagataccaccctattttgtgcat
+cagatgccaaagcacatgagacagaagtgcacaatgtctgggccacgcatgcctgtgtac
+ccacagaccccaacccacaagaaataaaactgggagatgtaacagaaaattttaacatgt
+ggraaaataaaatggcagagcagatgcaggaggatgtaatcagtttatgggatcaaagcc
+taaagccatgtgtaaagttaactcctctctgtgttactttaaactgtacccaggctaatt
+ggaaatctaataacacaacccagaatataaatagctyggtcacaataggaaatatgacag
+atgaagtaagaaattgttcttttaatatgaccacagaactaacagataagcagcagaagg
+tctatgcacttttttataagcttgatatagtagaaattaataatagtacgtataggttaa
+ttaattgtaatacttcagtcattaagcaagcttgtccaaaggtatcctttgatccaattc
+ctatacattattgtactccagctggttatgtgattttaaagtgcaatgataaaaaattca
+gtgggacagggccatgtaacaatgtaagctcagtacaatgcacacatggaattaagccag
+tggtgtcaactcaattgctattaaatggcagcctagcagaagaagagataataattagat
+ctgaaaatttcacaaataatgccaaaaccataatagtgcaccttaatgaatctgtacaaa
+tcacttgtaccagaccctccaacaatacaagagaaagtgtgcgtataggaccaggacaag
+tattctatagaacaggagaaataacaggagatataaggaaagcatattgtcagattaatg
+caacaaaatgggaaaaagttttaaaacaggtagctaaaaaattaagagagcaatttaata
+agacaaacataagatttcaaccacactcaggaggagatctagaaattacaatgcatcatt
+ttaattgtaaaggggaatttttctattgcaatacaacacaactgtttgatagtagttgga
+atacaacaacaaccaatagggagaaccgtagtaatttcatacttccatgcaggataaaac
+aaattataaacatgtggcaggaaacaggaaaagcaatgtatgctcctcccatcaggggaa
+gcattcagtgtgtatcaaatattacaggaatactattgacaagagatggtggtaataata
+atgggtctaacgagaccgagacctttagacctggaggaggagatataagagacaattgga
+gaagtgaattatataaatataaagtagtacaaattgaaccactaggagtagcacccacca
+gggcaaagagaagagtggtggagagagaaaaaagagcagtagtgggaataggagctatga
+tctttgggttcttaggagcagcaggaagcactatgggcgcggcgtcattaacgctgacgg
+tacaggccagacaattactgtctggtatagtgcaacagcaaagcaatttgctgagggcta
+tagaggcgcaacagcatatgttgcaactcacagtctggggcattaaacagctccaggcaa
+gagtcctggctgtggaaagatacctaaaggatcaaaggttcctaggactttggggctgct
+ctgggaagatcatctgcaccactgctgtgccctggaacaacacttggagtaataaatctt
+atgaagaaatttggaacaacatgacatggacacaatgggagagagaaattagcaattaca
+cagaccaaatatatgctatacttacagaatcgcaaaaccagcaggacaaaaatgagaagg
+atttgttggaattggaccaatgggcaagtctgtggaattggtttagcataacaaagtggc
+tgtggtatataaaaacatttataatgatagtaggaggtttaataggattaagaataatct
+ttgctgtgctttctatagtgaatagagttaggcagggatactcacccttgtctttccaga
+tccctctccaccagcagagggaaccagacagacccggaagaatcgaagaagaaggtggcg
+ggcaagacagagacagatccgtaagattagtgagcggattcttagctctgttgtgggacg
+atctacggaacctgtgcctcttcagctaccatcgcttgagagacttcatcttgattgtaa
+cgaggactgtggaacttctgggacacagcagtctcaagggactgagactggggtgggaag
+gcctcaaatatctggggaatcttctgttatattgggggcaggaactaaaaattagtgcta
+tttctttgcttaatactacagcaatagcagtagcagagtggacagatagggttatagaag
+tagcacaaagagcttggagggctatccttcacatacctagaagaatccgacagggcttag
+aaaggactttggtataacatgggaggcaaatggtcaaaaagtagcatagtgggatggcct
+caggtcagagaaagaataaggcaaactcccccagcaacagaaggagtaggagcagtatct
+caagatctagataaacatggagcagtaacaagcaataatatgaataatgatgatagtgtc
+tggctgagagcacaagaggaagatgaggaaggggtaggctttccagtcaggccacaggta
+cctctaagaccaatgacttataaggacgcttttgatcttagcttctttttaaaagaaaag
+gggggactggatgggctaatttactccaagaaaagacaagagatccttgacttatgggtt
+tataacacacaaggcttcttccctgattggcagaactacacaccagggccagggattaga
+tatccactgtgttttggatggtgcttcaaactagtaccagttgacccaagagaagtagag
+gaggacaacaaaggagaaaacaactgcctgttgcaccccgcaagccagcatggaatagat
+gacgaagaaagagaagtgctgatgtggaagtttgacagtgccctagcacgaaaacaccta
+gcccgagaactgcatccagagttctataaagactgctgacaaagaagtttctaactagga
+cttccgctggggactttccaggggaggtgtggccggggcggagttggggagtggctaacc
+ctcagatgctgcataaaagcagccgctttgcgcttgtactgggtctctcttggtagacca
+ggtcgagcccgggagctctctggctagcaagggaacccactgcttagagcctcaataaag
+cttgccttgagtgcttgaagtggtgtgtgcccgtctgtgttaggactctggtaact
diff --git a/iva/tests/kcount_test.py b/iva/tests/kcount_test.py
index 1e33668..65ae02a 100644
--- a/iva/tests/kcount_test.py
+++ b/iva/tests/kcount_test.py
@@ -42,6 +42,14 @@ class TestKcount(unittest.TestCase):
os.unlink(counts_file)
+ def test_run_kmc_two_threads(self):
+ '''Test test_run_kmc with two threads'''
+ reads = os.path.join(data_dir, 'kcount_test.run_kmc.fa')
+ counts_file = kcount._run_kmc(reads, 'tmp.run_kmc', 10, 2, 4, threads=2)
+ self.assertTrue(filecmp.cmp(counts_file, os.path.join(data_dir, 'kcount_test.run_kmc.counts'), shallow=False))
+ os.unlink(counts_file)
+
+
def test_kmc_to_kmer_counts(self):
'''Test _kmc_to_kmer_counts'''
counts = kcount._kmc_to_kmer_counts(os.path.join(data_dir, 'kcount_test.kmc_counts'), number=2)
diff --git a/scripts/iva b/scripts/iva
index af7eea2..1484839 100755
--- a/scripts/iva
+++ b/scripts/iva
@@ -63,10 +63,21 @@ trimming_group.add_argument('--pcr_primers', action=iva.common.abspathAction, he
other_group = parser.add_argument_group('Other options')
other_group.add_argument('-i', '--max_insert', type=int, help='Maximum insert size (includes read length). Reads with inferred insert size more than the maximum will not be used to extend contigs [%(default)s]', default=800, metavar='INT')
other_group.add_argument('-t', '--threads', type=int, help='Number of threads to use [%(default)s]', default=1, metavar='INT')
+other_group.add_argument('--kmc_onethread', action='store_true', help='Force kmc to use one thread. By default the value of -t/--threads is used when running kmc')
other_group.add_argument('--strand_bias', type=float, help='Set strand bias cutoff of mapped reads when trimming contig ends, in the interval [0,0.5]. A value of x means that a base needs min(fwd_depth, rev_depth) / total_depth <= x. The only time this should be used is with libraries with overlapping reads (ie fragment length < 2*read length), and even then, it can make results worse. If used, try a low value like 0.1 first [%(default)s]', default=0, metavar='FLOAT in [0,0.5]')
+other_group.add_argument('--test', action='store_true', help='Run using built in test data. All other options will be ignored, except the mandatory output directory, and --trimmomatic and --threads can be also be used')
other_group.add_argument('--version', action='version', version=iva.common.version)
options = parser.parse_args()
+
+if options.test:
+ print('Running iva in test mode...')
+ this_script = os.path.abspath(__file__)
+ tester = iva.test_data_runner.Tester(options.outdir, this_script, trimmo_jar=options.trimmomatic, threads=options.threads)
+ tester.run()
+ sys.exit()
+
+
if options.seed_stop_length == 0:
options.seed_stop_length = int(0.9 * options.max_insert)
@@ -92,6 +103,12 @@ if os.path.exists(options.outdir):
sys.exit(1)
+if options.kmc_onethread:
+ kmc_threads = 1
+else:
+ kmc_threads = options.threads
+
+
iva.external_progs.get_all_versions(iva.external_progs.assembly_progs)
try:
@@ -188,6 +205,7 @@ elif options.reference:
index_k = options.smalt_k,
index_s = options.smalt_s,
threads = options.threads,
+ kmc_threads = kmc_threads,
max_insert = options.max_insert,
minid = 0.9,
seed_stop_length = options.seed_stop_length,
@@ -213,6 +231,7 @@ assembly = iva.assembly.Assembly(
map_index_k = options.smalt_k,
map_index_s = options.smalt_s,
threads = options.threads,
+ kmc_threads = kmc_threads,
map_minid = options.smalt_id,
contig_iter_trim = options.ctg_iter_trim,
ext_min_cov = options.ext_min_cov,
diff --git a/setup.py b/setup.py
index 828fb03..dec5c89 100644
--- a/setup.py
+++ b/setup.py
@@ -32,10 +32,10 @@ if not found_all_progs:
setup(
name='iva',
- version='1.0.0',
+ version='1.0.4',
description='Iterative Virus Assembler',
packages = find_packages(),
- package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*']},
+ package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*', 'test_run_data/*']},
author='Martin Hunt',
author_email='path-help at sanger.ac.uk',
url='https://github.com/sanger-pathogens/iva',
@@ -43,9 +43,9 @@ setup(
test_suite='nose.collector',
tests_require=['nose >= 1.3'],
install_requires=[
- 'pyfastaq >= 3.0.1',
+ 'pyfastaq >= 3.10.0',
'networkx >= 1.7',
- 'pysam >= 0.8.1'
+ 'pysam >= 0.8.1, <= 0.8.3',
],
license='GPLv3',
classifiers=[
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iva.git
More information about the debian-med-commit
mailing list