[med-svn] [iva] 03/05: Imported Upstream version 1.0.4

Sascha Steinbiss sascha at steinbiss.name
Thu Apr 28 22:04:57 UTC 2016


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository iva.

commit 0bb6f953ae62ed493a692729ce295d4a9b3f2c64
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Thu Apr 28 21:37:06 2016 +0000

    Imported Upstream version 1.0.4
---
 iva/__init__.py                                    |   1 +
 iva/assembly.py                                    |   5 +-
 iva/common.py                                      |   2 +-
 iva/external_progs.py                              |   8 +
 iva/gage/GetFastaStats$ContigAt.class              | Bin 489 -> 0 bytes
 iva/gage/GetFastaStats.class                       | Bin 9674 -> 0 bytes
 iva/gage/SizeFasta.class                           | Bin 3241 -> 0 bytes
 iva/gage/SplitFastaByLetter.class                  | Bin 2497 -> 0 bytes
 iva/gage/Utils$Pair.class                          | Bin 621 -> 0 bytes
 iva/gage/Utils$ToProtein.class                     | Bin 4565 -> 0 bytes
 iva/gage/Utils$Translate.class                     | Bin 1226 -> 0 bytes
 iva/gage/Utils.class                               | Bin 5321 -> 0 bytes
 iva/gage/getScaffoldStats$1.class                  | Bin 199 -> 0 bytes
 iva/gage/getScaffoldStats$Scaffold.class           | Bin 1232 -> 0 bytes
 iva/gage/getScaffoldStats.class                    | Bin 12187 -> 0 bytes
 iva/kcount.py                                      |  16 +-
 iva/kraken.py                                      |  21 +--
 iva/seed.py                                        |   7 +-
 iva/seed_processor.py                              |   7 +-
 iva/test_data_runner.py                            |  50 ++++++
 iva/test_run_data/hiv_pcr_primers.fa               |  36 +++++
 iva/test_run_data/iva_contigs_no_trimmomatic.fasta | 167 +++++++++++++++++++++
 .../iva_contigs_with_trimmomatic.fasta             | 152 +++++++++++++++++++
 iva/test_run_data/reads_1.fq.gz                    | Bin 0 -> 3813239 bytes
 iva/test_run_data/reads_2.fq.gz                    | Bin 0 -> 4661713 bytes
 iva/test_run_data/reference.fasta                  | 152 +++++++++++++++++++
 iva/tests/kcount_test.py                           |   8 +
 scripts/iva                                        |  19 +++
 setup.py                                           |   8 +-
 29 files changed, 629 insertions(+), 30 deletions(-)

diff --git a/iva/__init__.py b/iva/__init__.py
index 825a828..47ee547 100644
--- a/iva/__init__.py
+++ b/iva/__init__.py
@@ -17,5 +17,6 @@ __all__ = [
     'read_trim',
     'seed',
     'seed_processor',
+    'test_data_runner',
 ]
 from iva import *
diff --git a/iva/assembly.py b/iva/assembly.py
index 3a3a9ca..305c1d4 100644
--- a/iva/assembly.py
+++ b/iva/assembly.py
@@ -6,12 +6,13 @@ from iva import contig, mapping, seed, mummer, graph, edge, common
 import pyfastaq
 
 class Assembly:
-    def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0):
+    def __init__(self, contigs_file=None, map_index_k=15, map_index_s=3, threads=1, kmc_threads=1, max_insert=800, map_minid=0.5, min_clip=3, ext_min_cov=5, ext_min_ratio=2, ext_bases=100, verbose=0, seed_min_cov=5, seed_min_ratio=10, seed_min_kmer_count=200, seed_max_kmer_count=1000000000, seed_start_length=None, seed_stop_length=500, seed_overlap_length=None, make_new_seeds=False, contig_iter_trim=10, seed_ext_max_bases=50, max_contigs=50, clean=True, strand_bias=0):
         self.contigs = {}
         self.contig_lengths = {}
         self.map_index_k = map_index_k
         self.map_index_s = map_index_s
         self.threads = threads
+        self.kmc_threads = kmc_threads
         self.max_insert = max_insert
         self.map_minid = map_minid
         self.min_clip = min_clip
@@ -583,7 +584,7 @@ class Assembly:
         made_seed = False
 
         for i in range(max_attempts):
-            s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs)
+            s = seed.Seed(reads1=seed_reads1, reads2=seed_reads2, extend_length=self.seed_ext_max_bases, seed_length=self.seed_start_length, seed_min_count=self.seed_min_kmer_count, seed_max_count=self.seed_max_kmer_count, ext_min_cov=self.seed_min_cov, ext_min_ratio=self.seed_min_ratio, verbose=self.verbose, kmc_threads=self.kmc_threads, map_threads=self.threads, sequences_to_ignore=self.used_seeds, contigs_to_check=self.contigs)
 
             if s.seq is None or len(s.seq) == 0:
                 break
diff --git a/iva/common.py b/iva/common.py
index 5a8c804..271d35f 100644
--- a/iva/common.py
+++ b/iva/common.py
@@ -2,7 +2,7 @@ import argparse
 import os
 import sys
 import subprocess
-version = '1.0.0'
+version = '1.0.4'
 
 class abspathAction(argparse.Action):
     def __call__(self, parser, namespace, value, option_string):
diff --git a/iva/external_progs.py b/iva/external_progs.py
index a727111..8b42054 100644
--- a/iva/external_progs.py
+++ b/iva/external_progs.py
@@ -2,6 +2,7 @@ import shutil
 import subprocess
 import re
 import sys
+from distutils.version import LooseVersion
 import pyfastaq
 from iva import common
 
@@ -25,6 +26,11 @@ prog_to_version_cmd = {
     'samtools': ('samtools', re.compile('^Version: (.*)$')),
 }
 
+ 
+minimum_versions = {
+    'samtools': '0.1.19'
+}
+
 
 assembly_progs = [
     'kmc',
@@ -79,6 +85,8 @@ def get_all_versions(progs, must_be_in_path=True):
     info = []
     for prog in sorted(progs):
         version = get_version(prog, must_be_in_path=must_be_in_path)
+        if prog in minimum_versions and LooseVersion(version) < LooseVersion(minimum_versions[prog]):
+            raise Error('Found version ' + version + ' of ' + prog + ' but must be at least ' + minimum_versions[prog] + '. Cannot continue')
         info.append(' '.join(['Using', prog, 'version', version]))
     return info
 
diff --git a/iva/gage/GetFastaStats$ContigAt.class b/iva/gage/GetFastaStats$ContigAt.class
deleted file mode 100644
index 7b71d97..0000000
Binary files a/iva/gage/GetFastaStats$ContigAt.class and /dev/null differ
diff --git a/iva/gage/GetFastaStats.class b/iva/gage/GetFastaStats.class
deleted file mode 100644
index 11e678e..0000000
Binary files a/iva/gage/GetFastaStats.class and /dev/null differ
diff --git a/iva/gage/SizeFasta.class b/iva/gage/SizeFasta.class
deleted file mode 100644
index 4724b73..0000000
Binary files a/iva/gage/SizeFasta.class and /dev/null differ
diff --git a/iva/gage/SplitFastaByLetter.class b/iva/gage/SplitFastaByLetter.class
deleted file mode 100644
index 444915d..0000000
Binary files a/iva/gage/SplitFastaByLetter.class and /dev/null differ
diff --git a/iva/gage/Utils$Pair.class b/iva/gage/Utils$Pair.class
deleted file mode 100644
index c404bc5..0000000
Binary files a/iva/gage/Utils$Pair.class and /dev/null differ
diff --git a/iva/gage/Utils$ToProtein.class b/iva/gage/Utils$ToProtein.class
deleted file mode 100644
index c833220..0000000
Binary files a/iva/gage/Utils$ToProtein.class and /dev/null differ
diff --git a/iva/gage/Utils$Translate.class b/iva/gage/Utils$Translate.class
deleted file mode 100644
index 6842928..0000000
Binary files a/iva/gage/Utils$Translate.class and /dev/null differ
diff --git a/iva/gage/Utils.class b/iva/gage/Utils.class
deleted file mode 100644
index 66949ba..0000000
Binary files a/iva/gage/Utils.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats$1.class b/iva/gage/getScaffoldStats$1.class
deleted file mode 100644
index d6b14b7..0000000
Binary files a/iva/gage/getScaffoldStats$1.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats$Scaffold.class b/iva/gage/getScaffoldStats$Scaffold.class
deleted file mode 100644
index c1c69fb..0000000
Binary files a/iva/gage/getScaffoldStats$Scaffold.class and /dev/null differ
diff --git a/iva/gage/getScaffoldStats.class b/iva/gage/getScaffoldStats.class
deleted file mode 100644
index de57f0f..0000000
Binary files a/iva/gage/getScaffoldStats.class and /dev/null differ
diff --git a/iva/kcount.py b/iva/kcount.py
index 1b8a957..1146c58 100644
--- a/iva/kcount.py
+++ b/iva/kcount.py
@@ -52,14 +52,14 @@ def _median(d):
             return key
 
 
-def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail):
+def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_option, verbose, allow_fail, threads=1):
     f = pyfastaq.utils.open_file_write(script)
     print('set -e', file=f)
     kmc_command = ''.join([
         'kmc -fa',
          ' -m', str(m_option),
          ' -k', str(kmer),
-         ' -sf', '1',
+         ' -sf', str(threads),
          ' -ci', str(min_count),
          ' -cs', str(max_count),
          ' -cx', str(max_count),
@@ -80,7 +80,7 @@ def _run_kmc_with_script(script, reads, outfile, kmer, min_count, max_count, m_o
     return common.syscall('bash ' + script, allow_fail=allow_fail)
 
 
-def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0):
+def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0, threads=1):
     '''Runs the kmer counting program kmc on a FASTA file. Returns filename made by kmc of the counts of kmers'''
     reads = os.path.abspath(reads)
     tmpdir = tempfile.mkdtemp(prefix='tmp.run_kmc.', dir=os.getcwd())
@@ -92,11 +92,11 @@ def _run_kmc(reads, outprefix, kmer, min_count, max_count, verbose=0):
     # The range is 4-32 (GB).
     # Try 4 and 32 (the default), then give up. This seems to make a difference, regardless of
     # RAM available on the machine.
-    ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True)
+    ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 32, verbose, True, threads=threads)
     if not ran_ok:
         if verbose:
             print('First try of running kmc failed. Trying again with -m4 instead of -m32...', flush=True)
-        ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False)
+        ran_ok = _run_kmc_with_script('run_kmc.sh', reads, kmer_counts_file, kmer, min_count, max_count, 4, verbose, False, threads=threads)
 
     os.chdir(original_dir)
     shutil.rmtree(tmpdir)
@@ -204,7 +204,7 @@ def _counts_file_to_fasta(infile, outfile):
     pyfastaq.utils.close(fout)
 
 
-def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, threads=1):
+def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_count=10, max_count=100000000, most_common=100, method='kmc', verbose=0, ignore_seqs=None, contigs_to_check=None, kmc_threads=1, map_threads=1):
     '''Gets the most common kmers from a pair of interleaved read FASTA or FASTQ files. Takes the first N sequences (determined by head).  Returns a dict of kmer=>frequency. If kmer length is not given, use min(0.8 * median read length, 95)'''
     tmpdir = tempfile.mkdtemp(prefix='tmp.common_kmers.', dir=os.getcwd())
     counts = {}
@@ -217,8 +217,8 @@ def get_most_common_kmers(reads1, reads2, kmer_length=None, head=100000, min_cou
         kmer_length = min(int(0.8 * _median(read_lengths)), 95)
 
     if method == 'kmc':
-        counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose)
-        counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=threads)
+        counts_file = _run_kmc(reads, os.path.join(tmpdir, 'out'), kmer_length, min_count, max_count, verbose=verbose, threads=kmc_threads)
+        counts = _kmc_to_kmer_counts(counts_file, most_common, kmers_to_ignore=ignore_seqs, contigs_to_check=contigs_to_check, verbose=verbose, threads=map_threads)
     else:
         raise Error('Method "' + method + '" not supported in kcount.get_most_common_kmers(). Cannot continue.')
 
diff --git a/iva/kraken.py b/iva/kraken.py
index 5b95b8b..fc5bbda 100644
--- a/iva/kraken.py
+++ b/iva/kraken.py
@@ -1,3 +1,4 @@
+import stat
 import inspect
 import sys
 import os
@@ -25,7 +26,7 @@ class Database:
         self.minimizer_len = minimizer_len
         self.max_db_size = max_db_size
         self.current_taxon_id = 2000000000
-        self.current_gi = 4000000000 
+        self.current_gi = 4000000000
         self.preload = preload
         self.verbose = verbose
         self.taxon_to_parent = {}
@@ -88,11 +89,11 @@ class Database:
                 'genbank_ids': genbank_ids,
                 'new_gis': new_gis,
             }
-            
+
             self.current_taxon_id += 1
         pyfastaq.utils.close(f)
 
-        
+
     def _download_from_genbank(self, outfile, filetype, gi, max_tries=5, delay=3):
         assert filetype in ['gb', 'fasta']
         file_ok = False
@@ -137,7 +138,7 @@ class Database:
         gi = None
         for line in f:
             if line.startswith('                     /db_xref="taxon:'):
-                taxon_id = line.rstrip().split(':')[-1].rstrip('"')            
+                taxon_id = line.rstrip().split(':')[-1].rstrip('"')
             elif line.startswith('VERSION'):
                 gi = line.rstrip().split()[-1].split(':')[-1]
             if None not in [taxon_id, gi]:
@@ -161,7 +162,7 @@ class Database:
         iva.common.syscall('grep -v CONTIG ' + infile + ' > tmp.gbk; mv tmp.gbk ' + infile)
         iva.common.syscall(genbank2embl + ' ' + infile + ' ' + outfile, verbose=self.verbose)
         shutil.rmtree(tmpdir)
-        
+
 
     def _append_to_file(self, filename, line):
         try:
@@ -199,7 +200,7 @@ class Database:
             ]) + '\t|'
             self._append_to_file(self.kraken_nodes_dmp, line)
         self.added_to_kraken.add(new_taxon)
-        self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon)) 
+        self._append_to_file(self.kraken_gi_taxid_nucl_dmp, str(new_gi) + '\t' + str(new_taxon))
         iva.common.syscall('kraken-build --add-to-library ' + fa_file + ' --db ' + self.kraken_db, verbose=self.verbose)
 
 
@@ -254,7 +255,7 @@ class Database:
                     print('unlink', os.path.exists(fa_file), fa_file)
                 os.unlink(gb_file)
                 os.unlink(fa_file)
-               
+
 
     def _build_kraken_virus_db(self):
         if os.path.exists(self.done_files['clean']):
@@ -270,7 +271,7 @@ class Database:
             iva.common.syscall('kraken-build --download-taxonomy --db ' + self.kraken_db, verbose=self.verbose)
             if not self.skip_virus_download:
                 iva.common.syscall('kraken-build --download-library viruses --db ' + self.kraken_db, verbose=self.verbose)
-        
+
             if self.extra_refs_file is not None:
                 self._load_extra_ref_info()
                 self._download_extra_refs()
@@ -299,7 +300,7 @@ class Database:
             if os.path.exists(self.extra_refs_dir):
                 shutil.rmtree(self.extra_refs_dir)
             iva.common.syscall('touch ' + self.done_files['clean'], verbose=self.verbose)
-             
+
 
     def _get_genbank_virus_files(self):
         if os.path.exists(self.done_files['make_embl']):
@@ -336,7 +337,7 @@ class Database:
                 print()
             new_dir = re.sub('_uid[0-9]+$', '', directory).strip('_')
             if new_dir != directory:
-                os.rename(directory, new_dir) 
+                os.rename(directory, new_dir)
 
             final_dir =  os.path.join(self.embl_root, os.path.basename(new_dir))
             if os.path.exists(final_dir):
diff --git a/iva/seed.py b/iva/seed.py
index 883b689..993e31f 100644
--- a/iva/seed.py
+++ b/iva/seed.py
@@ -8,13 +8,14 @@ from iva import kcount, kmers, mapping
 class Error (Exception): pass
 
 class Seed:
-    def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, threads=1, sequences_to_ignore=None, contigs_to_check=None):
+    def __init__(self, extend_length=50, overlap_length=None, reads1=None, reads2=None, seq=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1, map_threads=1, sequences_to_ignore=None, contigs_to_check=None):
         if contigs_to_check is None:
             contigs_to_check = {}
         if sequences_to_ignore is None:
             sequences_to_ignore = set()
         self.verbose = verbose
-        self.threads = threads
+        self.kmc_threads = kmc_threads
+        self.map_threads = map_threads
         self.extend_length = extend_length
         self.ext_min_cov = ext_min_cov
         self.ext_min_ratio = ext_min_ratio
@@ -23,7 +24,7 @@ class Seed:
         if seq is None:
             if reads1 is None:
                 raise Error('Cannot construct Seed object. Need reads when no seq has been given')
-            kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check)
+            kmer_counts = kcount.get_most_common_kmers(reads1, reads2, most_common=1, min_count=seed_min_count, max_count=seed_max_count, kmer_length=seed_length, verbose=self.verbose, ignore_seqs=sequences_to_ignore, contigs_to_check=contigs_to_check, kmc_threads=self.kmc_threads, map_threads=self.map_threads)
             if len(kmer_counts) == 1:
                 self.seq = list(kmer_counts.keys())[0]
                 if self.verbose:
diff --git a/iva/seed_processor.py b/iva/seed_processor.py
index 316a785..d62d6b3 100644
--- a/iva/seed_processor.py
+++ b/iva/seed_processor.py
@@ -9,7 +9,7 @@ import pyfastaq
 class Error (Exception): pass
 
 class SeedProcessor:
-    def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000):
+    def __init__(self, seeds_fasta, reads1, reads2, outfile, index_k=15, index_s=3, threads=1, max_insert=500, minid=0.9, seed_stop_length=500, extend_length=50, overlap_length=None, ext_min_cov=5, ext_min_ratio=2, verbose=0, seed_length=None, seed_min_count=10, seed_max_count=100000000, kmc_threads=1):
         self.seeds_fasta = seeds_fasta
         self.reads1 = reads1
         self.reads2 = reads2
@@ -17,6 +17,7 @@ class SeedProcessor:
         self.index_k = index_k
         self.index_s = index_s
         self.threads = threads
+        self.kmc_threads = kmc_threads
         self.max_insert = max_insert
         self.minid = minid
         self.seed_stop_length = seed_stop_length
@@ -61,7 +62,9 @@ class SeedProcessor:
             verbose = self.verbose,
             seed_length = self.seed_length,
             seed_min_count = self.seed_min_count,
-            seed_max_count = self.seed_max_count
+            seed_max_count = self.seed_max_count,
+            kmc_threads = self.kmc_threads,
+            map_threads = self.threads
         )
         if len(new_seed) == 0:
             print('Warning: could not get most common kmer for', seed_name)
diff --git a/iva/test_data_runner.py b/iva/test_data_runner.py
new file mode 100644
index 0000000..6585899
--- /dev/null
+++ b/iva/test_data_runner.py
@@ -0,0 +1,50 @@
+import os
+import iva
+
+class Error (Exception): pass
+
+class Tester:
+    def __init__(self, outdir, iva_script, trimmo_jar=None, threads=1):
+        self.outdir = os.path.join(outdir)
+        if os.path.exists(self.outdir):
+            raise Error('Output directory alread exists. Cannot continue')
+
+        self.iva_script = iva_script
+        self.trimmo_jar = trimmo_jar
+        self.threads = threads
+
+
+    def _copy_input_files(self):
+        extractor = iva.egg_extract.Extractor(os.path.abspath(os.path.join(os.path.dirname(iva.__file__), os.pardir)))
+        test_files = os.path.join('iva', 'test_run_data')
+        extractor.copy_dir(test_files, self.outdir)
+        print('Copied input test files into here:', os.path.abspath(self.outdir))
+         
+
+    def _run_iva(self):
+        os.chdir(self.outdir)
+        cmd = self.iva_script + ' --threads ' + str(self.threads)
+        if self.trimmo_jar:
+            cmd += ' --trimmomatic ' + self.trimmo_jar
+
+        cmd += ' --pcr_primers hiv_pcr_primers.fa -f reads_1.fq.gz -r reads_2.fq.gz iva.out'
+
+        print('Current working directory:', os.getcwd())
+        print('Running iva on the test data with the command:', cmd, sep='\n')
+        iva.common.syscall(cmd)
+
+
+    def _check_output(self):
+        print('Finished running iva')
+        expected_contigs_file = os.path.abspath(os.path.join('iva.out', 'contigs.fasta'))
+        if os.path.exists(expected_contigs_file):
+            print('Looks OK. Final output contigs file is:', expected_contigs_file)
+        else:
+            print('Something went wrong! Final output contigs file not found:', expected_contigs_file)
+
+
+    def run(self):
+        self._copy_input_files()
+        self._run_iva()
+        self._check_output()
+
diff --git a/iva/test_run_data/hiv_pcr_primers.fa b/iva/test_run_data/hiv_pcr_primers.fa
new file mode 100644
index 0000000..90d2288
--- /dev/null
+++ b/iva/test_run_data/hiv_pcr_primers.fa
@@ -0,0 +1,36 @@
+>Pan-HIV-1_1F.1
+AGCCCGGGAGCTCTCTG
+>Pan-HIV-1_1F.2
+AGCCTGGGAGCTCTCTG
+>Pan-HIV-1_1R.1
+CCTCCAATTCCCCCTATCATTTT
+>Pan-HIV-1_1R.2
+CCTCCAATTCCTCCTATCATTTT
+>Pan-HIV-1_2F.1
+GGGAAGTGACATAGCAGGAAC
+>Pan-HIV-1_2F.2
+GGGAAGTGACATAGCTGGAAC
+>Pan-HIV-1_2F.3
+GGGAAGTGATATAGCAGGAAC
+>Pan-HIV-1_2F.4
+GGGAAGTGATATAGCTGGAAC
+>Pan-HIV-1_2R.1
+CTGCCATCTGTTTTCCATAATC
+>Pan-HIV-1_2R.2
+CTGCCATCTGTTTTCCATAGTC
+>Pan-HIV-1_3F
+TTAAAAGAAAAGGGGGGATTGGG
+>Pan-HIV-1_3R.1
+TGGCCTGTACCGTCAGCG
+>Pan-HIV-1_3R.2
+TGGCTTGTACCGTCAGCG
+>Pan-HIV-1_4F
+CCTATGGCAGGAAGAAGCG
+>Pan-HIV-1_4R.1
+CTTATATGCAGCATCTGAGGG
+>Pan-HIV-1_4R.2
+CTTATATGCAGCTTCTGAGGG
+>Pan-HIV-1_4R.3
+CTTTTATGCAGCATCTGAGGG
+>Pan-HIV-1_4R.4
+CTTTTATGCAGCTTCTGAGGG
diff --git a/iva/test_run_data/iva_contigs_no_trimmomatic.fasta b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta
new file mode 100644
index 0000000..9ab1fa0
--- /dev/null
+++ b/iva/test_run_data/iva_contigs_no_trimmomatic.fasta
@@ -0,0 +1,167 @@
+>contig.00001
+GCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTAAAGTG
+GTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCTAGACT
+GAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAATAGGGA
+CTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAGGTGCA
+CACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGAGGCTA
+GAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGCATGGG
+AAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGTATGGG
+CAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGAAGGAT
+GTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACTTAAAT
+CTTTATATAATACAGTAGCAACCCTCTATTGTGTACATCAAAGGATAGGGGTAAAAGACA
+CCAAGGAAGCTCTAGATAAAATAGAGGAAATACAAAATAAGAGCCAGCAAAAGACACAGC
+AGGCAGCCGCTAGCACAGGAAGCAGCGGCAAAGTCAGTCAAAATTACCCTATAGTGCAAA
+ATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATGGGTGA
+AAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATTATCAG
+AGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCAGGCAG
+CAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGATACACC
+CAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGACATAG
+CAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACCTATCC
+CAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAGAATGT
+ATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGACTATG
+TGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAATTGGA
+TGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAAAGCAT
+TAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGGACCTA
+GCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATATAATGA
+TGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAAAGAAG
+GACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGGGAAGG
+AAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAATTTGGC
+CTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGCCCCAC
+CAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCAGAAGG
+ACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGGCAGCG
+ACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACAGGAGC
+AGATGATACAGTATTAGAAGATATAAATTTGCCAGGAAAATGGAAACCAAAAATGATAGG
+GGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATTTGTGG
+AAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGACGAAA
+TATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACTGTACC
+AGTAAAATTAAAGCCAGGAATGGATGGCCCAAAAGTTAAACAGTGGCCATTGACAGAAGA
+AAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATCTCAAA
+AATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGACAGCAC
+CAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAGAGAACTCAGGACTTTTGGGA
+AGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACAGTACT
+AGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTATACTGC
+ATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAATGTGCT
+GCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATCTTAGA
+TCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTGTATGT
+AGGATCTGACTTAGAGATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACACCATCT
+GTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTCCTTTG
+GATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTGCCAGAAAA
+AGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCAAGCCA
+GATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAAACACT
+AACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGAGAGAT
+TCTAAAAGAACCAGTACATGGAACATATTATGACCCATCAAAAGACTTAATAGCAGAAAT
+ACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAAAATCT
+GAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTATTAAC
+AGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCTAGATT
+TAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAGGCTAC
+CTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTACCAATT
+AGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGTAGGGA
+GACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTTTCCCT
+AACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAGGATTC
+AGGATCAGAAGTAAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAAGCACA
+ACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAAAAGGA
+AAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAAGTAGA
+TAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAAGCTCA
+AGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAATTTGCC
+ACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGGGAAGC
+TATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACATCTAGA
+AGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAAGTTAT
+CCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGATGGCC
+AGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAAGCAGC
+CTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGTCAAGG
+AGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAGCAAGC
+TGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGAAAAGG
+GGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGACATACA
+AACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTACAGGGA
+CAGCAGAGACCCAATTTGGAAAGGACCAGCAAAGCTACTCTGGAAAGGTGAAGGGGCAGT
+AGTAATACAAGACAATAGTGACATAAAAGTAGTGCCAAGAAGAAAAGCAAAGATTATTAG
+GGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAAGATTA
+AAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGAGTGGT
+TTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATATCCCAC
+TAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAGGGACT
+GGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACAAATAG
+ATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGACTCTG
+CCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATCAGGAC
+ATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAAAAAGA
+TAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCCCCAGA
+AGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAGGAGCT
+TAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAGCACAT
+CTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTGCAACA
+ACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATACCAGG
+GAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCCGGGGA
+GTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTGCCAAC
+TATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACACCGAC
+GAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGTACTAA
+ATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAGGAATA
+ATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAACAAAAG
+AAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATGAGAGT
+GACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTTGGGTT
+GGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTTCCTGT
+GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA
+AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT
+ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT
+GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC
+TCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACTGACAC
+TGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCTTTTAA
+TATGACCACAGAACTAAATGATAAGCAGCGGAAGATTCATGCACTTTTTTATAAGCTTGA
+TATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACTTCAGT
+CATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGTACTCC
+AGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCATGTAA
+AAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAATTGCT
+GTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACAGATAA
+TACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGACCCTC
+TAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGACCAGG
+AGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGGAGGAA
+AGTCTTAGAACAGGTAACTGGAAAACTAAAAGAACACTTTAATAAGACAATAATCGTTGA
+ACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGGGAATT
+TTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACAGGGGG
+GTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATGTGGCA
+AGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGTATCAAA
+TATTACAGGAATGCTATTGACAAGAGATGGTGGTAATACAACTAATGAGACCTTCAGACC
+TGGAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAAGTAGTACA
+AATTGATCCACTAGGAATAGCACCCACCAAGGCAAAAAGAAGAGTGGTGGACAGAGAAAA
+AAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGAAGCACTAT
+GGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGTATAGTGCA
+ACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAACTCACAGT
+CTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTAAAGGATCA
+AAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCTGTGCCCTG
+GAACACCTCTTGGAGTAATAAATCTTATGAAGAAATTTGGAACAACATGACATGGATAGA
+ATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACAAAATCGCA
+GGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCAAGTCTGTG
+GACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATGATAGTAGG
+AGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGAGTTAGGCA
+GGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCCGACAGGCC
+CGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGATTAGTCAG
+CGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTCTACCACCG
+CTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACACAACAGTCT
+CAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTGTTGTATTG
+GGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATAGCAGTAGG
+GGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTTCTCCACAT
+ACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGGCAAGTGGT
+CAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAACAAACCCTC
+AAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGCAGTAACAA
+GTACTAATATGAATAATGCAGATAGTGTCTGGCTGAGAGCACAAGAAGAAGATAACGAGG
+GGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAAGGGAGCAT
+TTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTACTCCAAGA
+AAAGACAAGAGATCATTGACTTATGGGTTTATAATACACAAGGCTACTTCCCTGATTGGC
+AAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTGCTTCAAGT
+TAGTACCAGTTGACCCAAGTGAAGTAGAGGAGAACAACAAAGGAGAAAACAACTGCCTGC
+TACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGATGTGGAAGT
+TTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTACTATAAAG
+ACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGGGAGGTGTG
+GCCGGGGCGGAGCTGGGGAGTGGTTAACCCTCAGAAGCTGCATAAAAGAGATCGGAAGAG
+CGGTTCAGCAGGAATGCCGAGACCGATCTCGGTCGTGTATCTCGTATGCCGTCTTCTGCT
+TGA
+>contig.00002
+GTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAGACAGA
+AATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAAGAAAT
+ACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAGCAGAT
+GCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGTTAACTCC
+TCTCTGTGTTACTTTACATTGTACCAATGTCACTAACATAGGAGGAGATGGAACAAAGGA
+AGTAAGAAACTGTTCTTTTAATATGACCACAGAACTAAAAGATAAGAAGCGGGAGATTCA
+TGCACTTTTTTATAGGCTTGATATAGTACCAGTTGATCCTAAGGCTAATAATAGTGAGTA
+TAGGTTAATAAATTGTAATACTTCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGA
+TCCAATTCCTATACATTATTGCACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAA
+GAATTTCAATGGGACAGGGCCATGTAAAAATGTCAGCTCAGTACAATGCACACATGGAAT
+TAAGCCAGTGGTATCAACTCAATTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAAT
+AATCA
diff --git a/iva/test_run_data/iva_contigs_with_trimmomatic.fasta b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta
new file mode 100644
index 0000000..f3c3e05
--- /dev/null
+++ b/iva/test_run_data/iva_contigs_with_trimmomatic.fasta
@@ -0,0 +1,152 @@
+>contig.00001
+CTCTGGCTAGCAAGGGAACCCACTGCTTAAAGCCTCAATAAAGCTTGCCTTGAGTGCTTA
+AAGTGGTGTGTGCCCGTCTGTGTTAGGACTCTGGTAACTAGAGATCCCTCAGACCACTCT
+AGACTGAGTAAAAATCTCTAGCAGTGGCGCCCGAACAGGGACTTGAAAGCGAAAGTTAAT
+AGGGACTCGAAAGCGAAAGTTCCAGAGAAGATCTCTCGACGCAGGACTCGGCTTGCTGAG
+GTGCACACAGCAAGAGGCGAGAGCGGCGACTGGTGAGTACGCCAAATTTTGACTAGCAGA
+GGCTAGAAGGAGAGAGATGGGTGCGAGAGCGTCAGTATTAAGTGGAGGAAAATTAGATGC
+ATGGGAAAAAATTCGGCTACGGCCAGGGGGAAAGAAAAAGTATAGGCTGAAACATTTAGT
+ATGGGCAAGCAGAGAGTTGGAAAGATTCGCAATTAACCCTGGCCTTTTAGAATCAGCAGA
+AGGATGTCAACAAATAATAGAACAGTTACAGCCAACTCTCAAGACAGGATCAGAAGAACT
+TAAATCTTTATATAATACAGTAGCAACCCTCTGGTGCGTACACCAAAGGACAGATGTAAA
+AGACACCAAGGAAGCTTTAGATAAAATAGAGGAAGCACAAAACAGGAACCAGCAAAAGAC
+ACAGCAGGCAGCCGCTAGCACAGGAAGCAGCAGCAACGTCAGCCAAAATTACCCTATAGT
+GCAAAATGCACAAGGGCAAATGACACATCAGTCTTTATCACCCAGAACTTTGAATGCATG
+GGTGAAAGTAGTAGAAGAAAAGGGTTTTAACCCAGAAGTAATACCCATGTTCTCAGCATT
+ATCAGAGGGAGCCACCCCACAAGATTTAAATATGATGCTAAATATAGTGGGGGGACACCA
+GGCAGCAATGCAAATGTTAAAAGAAACCATCAATGAGGAAGCTGCAGAATGGGATAGGAT
+ACACCCAGTACATGCAGGGCCTATTCCACCAGGCCAAATGAGGGAACCAAGGGGAAGTGA
+CATAGCAGGAACTACTAGTACCCTTCAAGAACAAATAGGATGGATGACAAGCAATCCACC
+TATCCCAGTGGGAGACATCTATAAAAGATGGATAATCCTGGGATTGAATAAAATAGTAAG
+AATGTATAGCCCTGTTAGCATTTTGGATATAAAACAAGGGCCAAAAGAGCCCTTCAGAGA
+CTATGTGGATAGGTTCTATAGAACTCTCAGAGCGGAGCAAGCTACACAGGAGGTAAAAAA
+TTGGATGACAGAAACCTTACTAGTCCAAAATGCGAATCCAGACTGTAAGTCCATTTTAAA
+AGCATTAGGAACAGGAGCTACATTAGAAGAAATGATGACAGCATGCCAGGGAGTGGGAGG
+ACCTAGCCACAAAGCAAGGGTTTTGGCTGAGGCAATGAGCCAAGCACAACATACAAATAT
+AATGATGCAGAGAGGCAATTTTAAGGGCCAGAAAAGAATTAAGTGTTTCAACTGTGGCAA
+GGAAGGACACCTAGCCAGAAATTGCAGGGCCCCTAGAAAAAAGGGTTGTTGGAAATGTGG
+GAAGGAAGGACATCAAATGAAAGACTGCACTGAAAGACAGGCTAATTTTTTAGGGAGAAT
+TTGGCCTTCCAACAGAGGAAGGCCAGGGAATTTTCCTCAGAGCAGAACAGAGCCAACAGC
+CCCACCAGCAGAGAATTGGGGGATGGGGGAAGAGACAACCTCCTTACTGAAGCAGGAGCA
+GAAGGACAAGGAACATCCTCCTCCCTCTCCTCCCTCAATTTCCCTCAAATCACTCTTTGG
+CAGCGACCCCTTGTCACAGTAAAAATAGGGGGACAGCTGAAAGAAGCTCTATTAGATACA
+GGAGCAGATGATACAGTATTAGAAGAAATAAATTTGCCAGGAAAATGGAAACCAAAAATG
+ATAGGGGGAATTGGAGGTTTTATCAAGGTAAAGCAGTATGATCAGATATGTATAGAAATT
+TGTGGAAAAAAGGCTATAGGTACAGTACTAGTAGGACCTACACCTGTCAACATAATTGGA
+CGAAATATGTTGACTCAGATTGGTTGTACTTTAAATTTCCCAATTAGTCCTATTGAAACT
+GTACCAGTAAAATTAAAGCCAGGAATGGATGGCCCAAAGGTTAAACAGTGGCCATTAACA
+GAAGAAAAAATAAAAGCATTAACAGAAATTTGTAATGATATGGAAAAGGAAGGAAAAATC
+TCAAAAATTGGGCCTGAAAACCCATATAATACTCCAATATTTGCTATAAAGAAAAAGGAC
+AGCACCAAATGGAGAAAATTAGTAGATTTCAGAGAACTTAATAAAAGAACTCAGGACTTT
+TGGGAAGTTCAATTAGGAATACCACATCCTGCAGGGTTAAAAAAGAAAAAATCAGTAACA
+GTACTAGATGTGGGAGATGCATATTTTTCAGTACCTTTAGATGAAAACTTTAGAAAGTAT
+ACTGCATTCACCATACCTAGTAGAAACAATGAGACACCAGGAATCAGATATCAGTACAAT
+GTGCTGCCACAGGGATGGAAAGGATCACCAGCAATATTCCAAAGTAGCATGACAAAAATC
+TTAGATCCTTTTAGAAAACAAAATCCAGACATAGTTATTTATCAATACATGGATGATTTG
+TATGTAGGATCTGACTTAGAAATAGAGCAGCATAGAACAAAAATAGAGGAACTAAGACAC
+CATCTGTTGAAGTGGGGGTTTACCACACCAGACAAAAAACATCAGAAAGAACCTCCATTC
+CTTTGGATGGGGTATGAACTCCATCCGGATAAGTGGACAGTACAGCCTATAAAGCTTCCA
+GAAAAAGACAGCTGGACTGTCAATGACATACAGAAGTTAGTGGGAAAATTAAATTGGGCA
+AGCCAGATTTATCCAGGGATTAAAGTAAAACAATTATGTAAACTCATTAGGGGGACCAAA
+ACACTAACAGAAGTAGTACCACTAACAAAAGAAGCAGAGCTAGAACTGGCAGAAAACAGA
+GAGATTCTAAAAGAACCAGTACATGGAGTATATTATGACCCATCAAAAGACTTAATAGCA
+GAAATACAGAAGCAGGGAAATGGCCAATGGACATATCAAATTTATCAAGAGCCATTTAAA
+AATCTGAAAACAGGAAAGTATGCAAAAATAAGGGGAGCCCACACCAATGATGTAAAACTA
+TTAACAGAGGCAGTGCAAAAAATAGCCACAGAAAGCATAGTAATATGGGGAAAGACCCCT
+AGATTTAGATTACCCATACAAAGAGAAACATGGGACACATGGTGGACGGAATATTGGCAG
+GCTACCTGGATTCCTGAATGGGAATTTGTTAATACCCCTCCTCTAGTAAAATTATGGTAC
+CAATTAGAAAAAGACCCCATAGTAGGAGCAGAGACTTTCTATGTAGATGGGGCAGCTAGT
+AGGGAGACTAAGCTAGGAAAAGCAGGGTATGTCACTGACAGAGGAAGACAAAAGGTAGTT
+TCCCTAACTGAGACAACAAATCAGAAGACTGAATTGCATGCGATCCATTTAGCCTTGCAG
+GATTCAGGATCAGAAGTGAATATAGTAACAGACTCACAATATGCATTAGGAATCATTCAA
+GCACAACCAGACATGAGTGAATCAGAAGTAGTCAACCAAATAATAGAGGAGCTAATAAAA
+AAGGAAAGAGTCTACCTGTCATGGGTACCAGCACACAAAGGGATTGGAGGAAATGAACAA
+GTAGATAAACTAGTCAGTTCAGGAATCAGGAAGGTGCTATTTTTAGATGGGATAGACAAA
+GCTCAAGAAGATCATGAAAGATATCACAGCAATTGGAGAACAATGGCTAGTGATTTTAAT
+TTGCCACCTATAGTAGCAAAGGAAATAGTAGCCAACTGTGATAAATGTCAACTGAAAGGG
+GAAGCTATGCATGGACAAGTAGACTGTAGTCCAGGAATATGGCAATTAGATTGCACACAT
+CTAGAAGGAAAAGTCATCCTGGTAGCAGTCCACGTGGCCAGTGGATATATAGAAGCAGAA
+GTTATCCCAGCAGAAACAGGACAGGAGACAGCATACTTTCTGCTAAAATTAGCAGGAAGA
+TGGCCAGTAAAAGTAATACACACAGACAATGGTAGCAATTTCACCAGCACTGCAGTTAAA
+GCAGCCTGTTGGTGGGCCAATGTCCAACAGGAATTTGGGATTCCCTACAATCCCCAAAGT
+CAAGGAGTAGTAGAATCTATGAATAGGGAATTAAAGAAAATCATAGGGCAGGTAAGAGAG
+CAAGCTGAACACCTTAAGACAGCAGTACAAATGGCAGTATTCATCCACAATTTTAAAAGA
+AAAGGGGGGATTGGGGGGTACAGTGCAGGGGAAAGAATAATAGACATAATAGCAACAGAC
+ATACAAACTAAAGAATTACAAAAACAAATTACAAAAATTCAAAATTTTCGGGTTTATTAC
+AGGGACAGCAGAGACCCAATTTGGAAAGGACCAGCCAAACTACTCTGGAAAGGTGAAGGG
+GCAGTAGTAATACAAGACAATAGTGACATAAAAGTAGTACCAAGAAGAAAAGCAAAGATT
+ATTAGGGATTATGGAAAACAGATGGCAGGTGATGATTGTGTGGCAGGTAGACAGGATGAA
+GATTAAAACATGGAACAGTCTAGTAAAATATCATATGTATAGATCAAAGAAAGCTAAAGA
+GTGGTTTTATAGACATCATTATGAAAGCCAGAATCCAAAGGTAAGTTCAGAAGTACATAT
+CCCACTAGGAGAGGCTAGATTAATAATAAGAACATATTGGGGTCTGCAGACAGGAGAAAG
+GGACTGGCATTTGGGTCATGGGGTCTCCATAGAATGGAGTCAGAGAAATTATAGCACACA
+AATAGATCCTGACCTAGCAGACCAACTGATTCATCTACAATATTTTGACTGTTTTTCAGA
+CTCTGCCATAAGGAAAGCCATATTAGGACAAGTAGTTAGTCATAGGTGTGAATATCCATC
+AGGACATAACAAGGTAGGATCCCTACAATATTTGGCACTGAAAGCATTAGCAACACCAAA
+AAAGATAAGGCCACCTCTGCCTAGTGTTAAGAAATTAACAGAAGATAGATGGAACGAGCC
+CCAGAAGATCAGGGGCCACAGAGAGAACCCAACAATGAATGGACATTAGAACTATTAGAG
+GAGCTTAAAAATGAAGCTGTCAGACATTTCCCTAGGCTCTGGCTCCATGGCTTAGGACAG
+CACATCTATGACACTTATGGGGATACTTGGGAAGGGGTTGAAGCTATAATAAGAACTTTG
+CAACAACTACTGTTTGTTCATTTCAGAATTGGGTGTCAACATAGCAGAATAGGCATTATA
+CCAGGGAGAAGAGGCAGGAATGGAGCCAGTAGATCCTAACCTAGAGCCCTGGAAGCATCC
+GGGGAGTCAGCCTACAACTGCTTGTACCAATTGCTACTGTAAAAAATGTTGCTGGCATTG
+CCAACTATGCTTTCTGAAAAAAGGCTTAGGCATCTCCCATGGCAGGAAGAAGCGGAAACA
+CCGACGAGGAACTCCTCGCAGCAGTAAGGACCATCAAAATCCTATACCAGAGCAGTAAGT
+ACTAAATATATGTAATGCAAGCTTTAGCTATATTAGCAATAGTAGGATTAGTATTAGCAG
+GAATAATAGCAATAGTTGTGTGGACTATAGTGTTCATAGAATATAGGAAAATAAGAAAAC
+AAAAGAAAATAGACAGGTTAATTGATAGAATAAGAGAAAGAGCAGAAGACAGTGGAAATG
+AGAGCGACGGGGACACAGATGACTTGGCCAAGCTTTTGGAAATGGGGGACCTTGATCCTT
+GGGTTGGTGATAATTTGTAGTGCTGCAGAACAATTGTGGGTTACAGTTTATTATGGGGTT
+CCTGTGTGGAGAGATGCAGATACCACCCTATTTTGTGCATCAGATGCTAAAGGACATGAG
+ACAGAAATGCACAATGTCTGGGCCACACATGCCTGTGTACCCACAGACCCCAACCCACAA
+GAAATACTCATGGAGAATGTAACAGAAAATTTTAACATGTGGAAAAATAACATGGTAGAG
+CAGATGCATGAGGATGTAATCAGTTTATGGGATCAAAGTCTAAAACCATGTGTAAAGCTA
+ACTCCTCTCTGTGTTACTTTAAATTGTACGAAGGTCAATATGGCCGGAGTCAATATTACT
+GACACTGACAATATCACCAACATAATAGGAAATGGAACAGAGGAAGTAAGAAACTGTTCT
+TTTAATATGACCACAGAACTAAATGATAAGCAGCGGCAGATTCATGCACTTTTTTATAAG
+CTTGATATAGTATCAATGTATAATGATAATAGTAGTTATAGGTTAATAAATTGTAATACT
+TCAGTCATTAAGCAGGCTTGTCCAAAGGTATCCTTTGATCCAATTCCTATACATTATTGT
+ACTCCAGCTGGTTATGCGATTTTAAAGTGTAATGATAAGAATTTCAATGGGACAGGGCCA
+TGTAAAAATGTCAGCTCAGTACAATGCACACATGGAATTAAGCCAGTGGTATCAACTCAA
+TTGCTGTTAAATGGAAGTCTAGCAGAAGAAGAGATAATAATCAGATCTGAAAATCTCACA
+GATAATACAAAAACCATAATAGTGCACCTTAATACATCTGTACAAATTAATTGTACCAGA
+CCCTCTAACAATACAAGAACAAGTGCAAGTATAGGACCAGGACAAGTATTATTCTATAGA
+CCAGGAGAAGTAATAGGAAATATAAGAAAAGCATATTGTAATATTAGTGGAACAGCATGG
+AGGAAAGTTTTAAAACAGGTAACTGAAAAACTAAAAGAACACTTTAATAAAACAATACAC
+GTTGAACCACACTCAGGAGGAGATCTAGAAATTACAACACATCACTTTAATTGTAGAGGG
+GAATTTTTTTATTGCAATACAACAAAACTGTTTACTAATAATTGCACAGATAACAGCACA
+GGGGGGTGTAATGATACTAATATCATAATTCCATGCAAGATAAGACAAATTGTACGCATG
+TGGCAAGGAGTAGGACAAGCAATGTACGCTCCTCCCATCAGTGGAGAAATTAAGTGTGAA
+TCAAATATTACAGGAATACTATTGACAAGAGATGGTGGTCATAATTCAACTAATGAGACC
+TTCAGACCTGAAGGAGGAAATATAAAGGACAATTGGAGAAGTGAATTATATAAATATAAA
+GTAGTACAAATTGATCCACTAGGAATAGCACCCACCAGGGCAAAAAGAAGAGTGGTGGAC
+AGAGAAAAAAGAGCAGTGGGAATAGGAGCTATGATCTTTGGGTTCTTAGGAGCAGCAGGA
+AGCACTATGGGCGCGGCGTCAATAACGCTGACGGTACAGGCCAGAGAATTATTGTCTGGT
+ATAGTGCAACAGCAAAGCAATTTGCTGAGGGCTATAGAGGCGCAGCAGCATCTGTTGCAA
+CTCACAGTCTGGGGCATTAAACAGCTCCAGGCAAGAGTCCTGGCTGTGGAAAGATACCTA
+AAGGATCAAAAGTTCCTAGGACTTTGGGGCTGCTCTGGAAAAATCATCTGTCCCACTGCT
+GTGCCCTGGAACACCTCTTGGAGTAATAAATCTCATGACGAGATTTGGAACAACATGACA
+TGGATAGAATGGGAGAGAGAAATTAGCAATTACACAAGCCAAATATATGAGATACTTACA
+AAATCGCAGGACCAGCAGGATAGAAATGAAAAGGATTTGTTAGAATTGGACAATTGGGCA
+AGTCTGTGGACTTGGTTTGACATATCAAATTGGCTGTGGTATATAAGAATATTTATAATG
+ATAGTAGGAGGTTTAATAGGTTTAAGAATAATTTTTGCTGTGCTTTCCATAGCGAATAGA
+GTTAGGCAGGGATACTCACCTCTGTCTTTCCAGACCCCTATCCAACTGCAGAGGGAACCC
+GACAGGCCCGAAGGAATCGAAGAAGGAGGTGGCGAGCAAGGCAGAGACAGATCCGTGAGA
+TTAGTCAGCGGATTCTTGACTCTTGTCTGGGACGATCTACGGAGCCTGTTCCTCTTCCTC
+TACCACCGCTTGAGAGACTTCATCTTAATTGCAGCGAGGACTGTGGAACTTCTGGGACAC
+AACAGTCTCAAGGGACTGAGACGGGGGTGGGAAGGCCTCAAATACCTGGGGAATCTTCTG
+TTGTATTGGGGCCAGGAACTAAAAACTAGTGCTATTTCTTTGTTTAATGCTACAGCAATA
+GCAGTAGGGGGGTGGACAGATAGACTTATAGAAGTAGCGCAAAGAGCTTGGAGAGCCCTT
+CTCCACATACCTAGAAGAATCAGACAGGGCTTAGAAAGGGCTTTGCTATAACATGGGAGG
+CAAGTGGTCAAAAAGTAGCATAGTGGGGTGGCCTCAGGTCAGGGAAAGATTAAGGAGAAC
+AAACCCTCAAGCAACAGAAGGAGTAGGAGCAGTATCTCAAGATCTAGATAAACATGGAGC
+AGTAACAAGTACTAATATGAATAATGCTGATAGTGTCTGGCTGAGAGCACAAGAAGAAGA
+TAACGAGGGGGTAGGCTTTCCAGTCAGGCCACAGGTACCTCTAAGACCAATGACTTTTAA
+GGGAGCATTTGATCTTAGCTTCTTTTTAAAAGAAAAGGGGGGACTGGATGGGCTAATTTA
+CTCCCAGAAAAGACGAGAGATCCTTGACTTATGGGTTTATAATACACAAGGCTACTTCCC
+TGATTGGCAAAACTACACACCAGGGCCAGGGGTCAGATACCCACTGTGTTTTGGATGGTG
+CTTCAAGTTAGTACCAGTTGACCCAAGCGAAGTAGAGGAGAACAACAAAGGAGAAAACAA
+CTGCCTGCTACATCCCATGAGCCAGCATGGGCAAGAGGACGAGGAAAGAGAAGTGCTGAT
+GTGGAAGTTTGACAGTGCCCTAGCACGAAAACACATAGCCCGAGAACAACATCCAGAGTA
+CTATAAAGACTGCTGACAAAGAAGTTTCTAACTAGGACTTCCGCTGGGGACTTTCCAGGG
+GAGGTGTGGCCGGGGCGGAGCTGGGGAGTGGTTAA
diff --git a/iva/test_run_data/reads_1.fq.gz b/iva/test_run_data/reads_1.fq.gz
new file mode 100644
index 0000000..8f7a52a
Binary files /dev/null and b/iva/test_run_data/reads_1.fq.gz differ
diff --git a/iva/test_run_data/reads_2.fq.gz b/iva/test_run_data/reads_2.fq.gz
new file mode 100644
index 0000000..8e74d0c
Binary files /dev/null and b/iva/test_run_data/reads_2.fq.gz differ
diff --git a/iva/test_run_data/reference.fasta b/iva/test_run_data/reference.fasta
new file mode 100644
index 0000000..acda52b
--- /dev/null
+++ b/iva/test_run_data/reference.fasta
@@ -0,0 +1,152 @@
+>DQ234790
+cgaacagggacttgaaagcgaaagttaatagggactcgaaagcgaaagttccagagaagt
+tctctcgagcgcaggactcggcttgctgaggtgcacacagcaagaggcgagagcggcgac
+tggtgagtacgccaaattttgactagcggaggctagaaggagagagatgggtgcgagagc
+gtcaatattaacaggggaaaaattagatgcatgggaaaaaattcggttacggccaggggg
+aaagaaaaaatatatgataaaacatctagtatgggcaagcagagagttggaaagattcgc
+acttaaccctggccttttagaaacagcggaaggatgtcaacagataatagaacagttaca
+gtcaactctcaagacaggatcagaagaacttaaatcattatttaatacagtagcaaccct
+ctggtgcgtacaccaaaggatagaggtaaaagacaccaaggaagctttagataaattaga
+ggaaatacaaaataagaaccagaaaaagacacagcaggcagcagctggcacaggaagcaa
+cagcaaagtcagccaaaattaccctatagtgcaaaatgcacaaggacaaatgatacatca
+gtctttatcacctagaactttgaatgcatgggtgaaagtagtagaagaaaagggctttaa
+cccagaagtaatacccatgttctcagcattatcagagggagccgctccacaagatttaaa
+tatgatgctaaatatagtggggggacaccaggcagcaatgcaaatgttaaaagaaaccat
+caatgaggaagctgcagaatgggatagggtacacccagtacatgcagggcctattccacc
+aggccaaatgagggaaccaaggggaagtgacatagcaggaaccactagtacccttcaaga
+acaaataggatggatgacaagcaatccacytatcccagtgggagacatctataaaaggtg
+gataattctgggattaaataaaatagtaagaatgtatagccctgttagcattttggacat
+aagacaagggccaaaagaacccttcagagactatgtagataggttctataaaactctcag
+agcggaacaagctacacaggaagtaaaaaattggatgacagaaaccttgctagtccaaaa
+tgcgaatccagactgtaagtccattttaaaagcattaggagcaggagctactttagaaga
+aatgatgacagcatgccagggagtgggaggacctagccataaagcaagggttttggctga
+ggcaatgaaccaagcacaacagacaactgtaatgatgcagagaggcaattycaagggcca
+gaaaagaattaagtgcttcaactgtggcagggaaggacacctagccagaaattgcagggc
+ccctagaaaaaagggttgttggaaatgcgggaaggaaggacatcaaatgaaagactgcac
+tgagagacaggctaattttttagggaaaatttggcctcccaacaaggggaggccagggaa
+ttttcctcagagcagaccagagccttcagccccaccagcggaaaactggagggagataac
+ctccttactgaagcaggagcagaaggacaaggaacacccttctccttcaatctccctcaa
+atcactctttggcaacgaccccttgtcacagtaaaaataggaggacagctaaaagaagct
+ctattagatacaggagcagatgatacagtattagaagatataaatttgccaggaaaatgg
+aaaccaaaaatgatagggggaattggaggttttatcaaagtaagacaatatgatcagata
+cttatagaaatttgtggaaaaaaggctataggtacagtattagtaggacctacacctgtc
+aacataattgggcgaaatatgttgactcagattggctgtactttaaatttcccaatcagt
+cctattgacactgtaccagtaaaattaaagccaggaatggatggaccaaaggttaaacag
+tggccattgacagaagaaaaaataaaagcattaacagaaatttgtaaagagatggaagag
+gaaggaaagatctcaaaaattgggcctgaaaatccatacaatactccagtatttgctata
+aagaaaaaggacagcaccaaatggaggaaattagtagatttcagagagctcaataaaaga
+acccaggacttttgggaaattcaattaggaataccacacccagcaggtttaaaaaagaaa
+aaatcagtaacagtactagatgtgggagatgcatatttttcagttccattagataaayat
+tttagaaagtatacagcattcaccatacctagtataaacaatgagacaccaggaatcaga
+tatcagtacaatgtgctgccacagggatggaaaggatcaccggcaatattccagagtagc
+atgacaaaaatcttagaaccttttagagcaaacaatccagaaataattatctatcaatac
+atggatgacttgtatgtaggatctgacttagaaataggacagcatagaataaaaatagag
+gagctgagagctcatttattaagctggggatttactacaccagacaaaaagcatcagaag
+gaacctccattcctttggatggggtatgaactccatcctgacagatggacagtccagcct
+atagaactgccagaaaaagacagctggactgtcaatgatatacagaaattagtgggaaaa
+ctaaattgggcaagtcaaatttatgcagggattaagataaagcaattgtgtagactcctc
+aggggagctaaagcactaacagacgtagtaccactgactgaagaagcagaattagaattg
+gcagagaacagggagattctaaaaacccctgtgcatggagtatattatgacccatcaaaa
+gacttagtagcagaagtacagaagcaaggacaagaccaatggacatatcaaatttatcaa
+gagccatttaaaaatctaaaaacaggaaaatatgcaagaaaaaggtctgctcacactaat
+gatgtaagacaattagcagaagtggtgcaaaaaatagtcacagaaagcatagtaatatgg
+ggaaaggcccctaaatttaaactacccatacaaagagaaacatgggaaacatggtggatg
+gagtattggcaggctacctggattcctgaatgggagtttgtcaatacccctcctctagta
+aaattatggtaccaattagaaaaagaccccatagtgggagcagaaaccttctatgtagat
+ggggcagctagtagggaaactaagctaggaaaagcagggtatgtcactgacagaggaaga
+caaaaggtagtttccctaactgaaacaacaaatcaaaagactgagttacatgcaatctat
+ttagccttgcaagattcaggatcagaagtaaatatagtaacagactcacaatatgcatta
+ggaatcattcaggcacaaccagacaggagtgaatcagaaatagttagccaaataatagag
+gagctaataaaaaaggaaaaagtctacctgtcatgggtaccagcacataaagggattgga
+ggtaataacaaagtagataaattagtcagttcaggaatcaggaaagtgctatttttaaat
+gggatagataaggctcaagaagaacatgaaagatatcacagcaattggagaacaatggct
+agtgattttaatttgccacctatagtagcaaaggaaatagtagccaactgtgataaatgt
+caactaaaaggggaagctatgcatggacaagtagattgtagtccagggatatggcaatta
+gattgcacacatctagagggaaaagtcatcctggtagcagtccacgtggccagtggatat
+atagaagcagaagttatcccagcagaaacaggacaggagacagcatacttcctgctaaaa
+ttagcaggaagatggccagtaaaagtcatacacacagacaatggtagcaatttcaccagc
+gctgcagttaaagcagcctgttggtgggccaatgtccgacaggaatttgggatcccctac
+aatccccaaagtcaaggagtagtagaatctatgaataaggacttaaagaaaatcataggg
+caggtaagagaacaagctgaacatcttaagacagcagtacaaatggcagtattcattcac
+aattttaaaagaaaaggggggattggggggtacagtgcaggggaaagaataatagacata
+atagcaacagacatacaaactaaagaattacaaaaacaaattacaaaaattcaaaatttt
+cgggtctattacagggacagcagagacccaatttggaaaggaccagcaaaactactctgg
+aaaggtgaaggggcagtagtaatacaagacaatagtgatataaaagtagtgccaagaaga
+aaagcaaaaatcattagggattatggaaaacagatggcaggtgatgattgtgtggcaggt
+agacaggatgaggattagaacatggaacagtttagtaaaacatcatatgtatgtctcaaa
+gaaagctaaaaagtggtattatagacatcattatgaaagccagcatccaaagataagctc
+agaagtacacatcccactaggagaggctagattagtaataaaaacatattggggtctgca
+gacaggagaaaaggactggcaattgggtcatggagtctccatagaatggagacagagaaa
+ctatagcacacaaatagatcctgaagtagcagaccgactgattcatctacaatattttga
+ctgttttgcagactctgccataaggagagccatactaggacaagtagttagatataagtg
+tgaatatccatcaggacataacaaggtaggatctctacaatacttggcactaagggcatt
+aacagggccaaaagggagcaggccgcctctgcccagtgtaaagaaattaacagaagatag
+atggagcgagccccagaagaccaggggccacagagagaaccctacaatgaatggacatta
+gaactattagaggagcttaaaaatgaagctgytagacattttcctrggccctggctccat
+agcttaggacagtacatctatgatacttatggggatacttgggaaggggttgaagctata
+acaagaactttgcagcaactactgtttgttcatttcagaattgggtgtcaacatagcaga
+ataggcattataccagggagaagaggcaggaatggagccagtagatcctaacctagagcc
+ctggaatcatccgggaagtaagcctacaaccgcttgtaccaagtgttactgtaaaatatg
+ttcctggcattgccaattatgctttctgaaaaaaggcttaggcatctcctatggcaggaa
+gaagcggaagcaccgacgaggaactcctcggagcagtgagggccatcaaaatcctgtacc
+aaagcagtaagtatttgtaaaataagtaaatgtaatgacacctcttcaaattagtgcaat
+agtaggactgatagtagcgctaatcttagcaatagtagtgtggactatagtaggtttaga
+agttaggaaaatactaaggcaaagaaaaatagataggttaattaagaaaataagagaaag
+agaagaagacagtggaaatgagagtgaaggagacacagatgaattggccaaacttgtgga
+gatgggggactttgatccttgggttggtgataatttgtagtgcctcagacaacttgtggg
+ttacagtttattatggggtccctgtgtggaaagatgcagataccaccctattttgtgcat
+cagatgccaaagcacatgagacagaagtgcacaatgtctgggccacgcatgcctgtgtac
+ccacagaccccaacccacaagaaataaaactgggagatgtaacagaaaattttaacatgt
+ggraaaataaaatggcagagcagatgcaggaggatgtaatcagtttatgggatcaaagcc
+taaagccatgtgtaaagttaactcctctctgtgttactttaaactgtacccaggctaatt
+ggaaatctaataacacaacccagaatataaatagctyggtcacaataggaaatatgacag
+atgaagtaagaaattgttcttttaatatgaccacagaactaacagataagcagcagaagg
+tctatgcacttttttataagcttgatatagtagaaattaataatagtacgtataggttaa
+ttaattgtaatacttcagtcattaagcaagcttgtccaaaggtatcctttgatccaattc
+ctatacattattgtactccagctggttatgtgattttaaagtgcaatgataaaaaattca
+gtgggacagggccatgtaacaatgtaagctcagtacaatgcacacatggaattaagccag
+tggtgtcaactcaattgctattaaatggcagcctagcagaagaagagataataattagat
+ctgaaaatttcacaaataatgccaaaaccataatagtgcaccttaatgaatctgtacaaa
+tcacttgtaccagaccctccaacaatacaagagaaagtgtgcgtataggaccaggacaag
+tattctatagaacaggagaaataacaggagatataaggaaagcatattgtcagattaatg
+caacaaaatgggaaaaagttttaaaacaggtagctaaaaaattaagagagcaatttaata
+agacaaacataagatttcaaccacactcaggaggagatctagaaattacaatgcatcatt
+ttaattgtaaaggggaatttttctattgcaatacaacacaactgtttgatagtagttgga
+atacaacaacaaccaatagggagaaccgtagtaatttcatacttccatgcaggataaaac
+aaattataaacatgtggcaggaaacaggaaaagcaatgtatgctcctcccatcaggggaa
+gcattcagtgtgtatcaaatattacaggaatactattgacaagagatggtggtaataata
+atgggtctaacgagaccgagacctttagacctggaggaggagatataagagacaattgga
+gaagtgaattatataaatataaagtagtacaaattgaaccactaggagtagcacccacca
+gggcaaagagaagagtggtggagagagaaaaaagagcagtagtgggaataggagctatga
+tctttgggttcttaggagcagcaggaagcactatgggcgcggcgtcattaacgctgacgg
+tacaggccagacaattactgtctggtatagtgcaacagcaaagcaatttgctgagggcta
+tagaggcgcaacagcatatgttgcaactcacagtctggggcattaaacagctccaggcaa
+gagtcctggctgtggaaagatacctaaaggatcaaaggttcctaggactttggggctgct
+ctgggaagatcatctgcaccactgctgtgccctggaacaacacttggagtaataaatctt
+atgaagaaatttggaacaacatgacatggacacaatgggagagagaaattagcaattaca
+cagaccaaatatatgctatacttacagaatcgcaaaaccagcaggacaaaaatgagaagg
+atttgttggaattggaccaatgggcaagtctgtggaattggtttagcataacaaagtggc
+tgtggtatataaaaacatttataatgatagtaggaggtttaataggattaagaataatct
+ttgctgtgctttctatagtgaatagagttaggcagggatactcacccttgtctttccaga
+tccctctccaccagcagagggaaccagacagacccggaagaatcgaagaagaaggtggcg
+ggcaagacagagacagatccgtaagattagtgagcggattcttagctctgttgtgggacg
+atctacggaacctgtgcctcttcagctaccatcgcttgagagacttcatcttgattgtaa
+cgaggactgtggaacttctgggacacagcagtctcaagggactgagactggggtgggaag
+gcctcaaatatctggggaatcttctgttatattgggggcaggaactaaaaattagtgcta
+tttctttgcttaatactacagcaatagcagtagcagagtggacagatagggttatagaag
+tagcacaaagagcttggagggctatccttcacatacctagaagaatccgacagggcttag
+aaaggactttggtataacatgggaggcaaatggtcaaaaagtagcatagtgggatggcct
+caggtcagagaaagaataaggcaaactcccccagcaacagaaggagtaggagcagtatct
+caagatctagataaacatggagcagtaacaagcaataatatgaataatgatgatagtgtc
+tggctgagagcacaagaggaagatgaggaaggggtaggctttccagtcaggccacaggta
+cctctaagaccaatgacttataaggacgcttttgatcttagcttctttttaaaagaaaag
+gggggactggatgggctaatttactccaagaaaagacaagagatccttgacttatgggtt
+tataacacacaaggcttcttccctgattggcagaactacacaccagggccagggattaga
+tatccactgtgttttggatggtgcttcaaactagtaccagttgacccaagagaagtagag
+gaggacaacaaaggagaaaacaactgcctgttgcaccccgcaagccagcatggaatagat
+gacgaagaaagagaagtgctgatgtggaagtttgacagtgccctagcacgaaaacaccta
+gcccgagaactgcatccagagttctataaagactgctgacaaagaagtttctaactagga
+cttccgctggggactttccaggggaggtgtggccggggcggagttggggagtggctaacc
+ctcagatgctgcataaaagcagccgctttgcgcttgtactgggtctctcttggtagacca
+ggtcgagcccgggagctctctggctagcaagggaacccactgcttagagcctcaataaag
+cttgccttgagtgcttgaagtggtgtgtgcccgtctgtgttaggactctggtaact
diff --git a/iva/tests/kcount_test.py b/iva/tests/kcount_test.py
index 1e33668..65ae02a 100644
--- a/iva/tests/kcount_test.py
+++ b/iva/tests/kcount_test.py
@@ -42,6 +42,14 @@ class TestKcount(unittest.TestCase):
         os.unlink(counts_file)
 
 
+    def test_run_kmc_two_threads(self):
+        '''Test test_run_kmc with two threads'''
+        reads = os.path.join(data_dir, 'kcount_test.run_kmc.fa')
+        counts_file = kcount._run_kmc(reads, 'tmp.run_kmc', 10, 2, 4, threads=2)
+        self.assertTrue(filecmp.cmp(counts_file, os.path.join(data_dir, 'kcount_test.run_kmc.counts'), shallow=False))
+        os.unlink(counts_file)
+
+
     def test_kmc_to_kmer_counts(self):
         '''Test _kmc_to_kmer_counts'''
         counts = kcount._kmc_to_kmer_counts(os.path.join(data_dir, 'kcount_test.kmc_counts'), number=2)
diff --git a/scripts/iva b/scripts/iva
index af7eea2..1484839 100755
--- a/scripts/iva
+++ b/scripts/iva
@@ -63,10 +63,21 @@ trimming_group.add_argument('--pcr_primers', action=iva.common.abspathAction, he
 other_group = parser.add_argument_group('Other options')
 other_group.add_argument('-i', '--max_insert', type=int, help='Maximum insert size (includes read length). Reads with inferred insert size more than the maximum will not be used to extend contigs [%(default)s]', default=800, metavar='INT')
 other_group.add_argument('-t', '--threads', type=int, help='Number of threads to use [%(default)s]', default=1, metavar='INT')
+other_group.add_argument('--kmc_onethread', action='store_true', help='Force kmc to use one thread. By default the value of -t/--threads is used when running kmc')
 other_group.add_argument('--strand_bias', type=float, help='Set strand bias cutoff of mapped reads when trimming contig ends, in the interval [0,0.5]. A value of x means that a base needs min(fwd_depth, rev_depth) / total_depth <= x. The only time this should be used is with libraries with overlapping reads (ie fragment length < 2*read length), and even then, it can make results worse. If used, try a low value like 0.1 first [%(default)s]', default=0, metavar='FLOAT in [0,0.5]')
+other_group.add_argument('--test', action='store_true', help='Run using built in test data. All other options will be ignored, except the mandatory output directory, and --trimmomatic and --threads can be also be used')
 other_group.add_argument('--version', action='version', version=iva.common.version)
 
 options = parser.parse_args()
+
+if options.test:
+    print('Running iva in test mode...')
+    this_script = os.path.abspath(__file__)
+    tester = iva.test_data_runner.Tester(options.outdir, this_script, trimmo_jar=options.trimmomatic, threads=options.threads)
+    tester.run()
+    sys.exit()
+
+
 if options.seed_stop_length == 0:
     options.seed_stop_length = int(0.9 * options.max_insert)
 
@@ -92,6 +103,12 @@ if os.path.exists(options.outdir):
     sys.exit(1)
 
 
+if options.kmc_onethread:
+    kmc_threads = 1
+else:
+    kmc_threads = options.threads
+
+
 iva.external_progs.get_all_versions(iva.external_progs.assembly_progs)
 
 try:
@@ -188,6 +205,7 @@ elif options.reference:
         index_k = options.smalt_k,
         index_s = options.smalt_s,
         threads = options.threads,
+        kmc_threads = kmc_threads,
         max_insert = options.max_insert,
         minid = 0.9,
         seed_stop_length = options.seed_stop_length,
@@ -213,6 +231,7 @@ assembly = iva.assembly.Assembly(
     map_index_k = options.smalt_k,
     map_index_s = options.smalt_s,
     threads = options.threads,
+    kmc_threads = kmc_threads,
     map_minid = options.smalt_id,
     contig_iter_trim = options.ctg_iter_trim,
     ext_min_cov = options.ext_min_cov,
diff --git a/setup.py b/setup.py
index 828fb03..dec5c89 100644
--- a/setup.py
+++ b/setup.py
@@ -32,10 +32,10 @@ if not found_all_progs:
 
 setup(
     name='iva',
-    version='1.0.0',
+    version='1.0.4',
     description='Iterative Virus Assembler',
     packages = find_packages(),
-    package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*']},
+    package_data={'iva': ['gage/*', 'ratt/*', 'read_trim/*', 'test_run_data/*']},
     author='Martin Hunt',
     author_email='path-help at sanger.ac.uk',
     url='https://github.com/sanger-pathogens/iva',
@@ -43,9 +43,9 @@ setup(
     test_suite='nose.collector',
     tests_require=['nose >= 1.3'],
     install_requires=[
-        'pyfastaq >= 3.0.1',
+        'pyfastaq >= 3.10.0',
         'networkx >= 1.7',
-        'pysam >= 0.8.1'
+        'pysam >= 0.8.1, <= 0.8.3',
     ],
     license='GPLv3',
     classifiers=[

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/iva.git



More information about the debian-med-commit mailing list