[med-svn] [fastaq] 01/01: Removed all the source code

Jorge Soares jssoares-guest at moszumanska.debian.org
Wed Oct 8 10:54:01 UTC 2014


This is an automated email from the git hooks/post-receive script.

jssoares-guest pushed a commit to branch master
in repository fastaq.

commit 7b7443623efe3eccef7e2d8b613eb4273d68fe4e
Author: Jorge Soares <j.s.soares at gmail.com>
Date:   Wed Oct 8 11:58:24 2014 +0100

    Removed all the source code
---
 debian/changelog                                   |   4 +-
 debian/watch                                       |   4 +-
 fastaq/__init__.py                                 |   2 -
 fastaq/intervals.py                                | 117 -----
 fastaq/sequences.py                                | 549 --------------------
 fastaq/tasks.py                                    | 556 ---------------------
 fastaq/tests/data/sequences_test.embl              | 203 --------
 fastaq/tests/data/sequences_test.embl.bad          | 202 --------
 fastaq/tests/data/sequences_test.embl.bad2         | 202 --------
 fastaq/tests/data/sequences_test.embl.to_fasta     |  64 ---
 fastaq/tests/data/sequences_test.fa                |  19 -
 fastaq/tests/data/sequences_test.fa.ids            |   4 -
 fastaq/tests/data/sequences_test.fa.qual           |  17 -
 fastaq/tests/data/sequences_test.fa.qual.bad       |  17 -
 fastaq/tests/data/sequences_test.fasta_to_fastq.fq |  16 -
 fastaq/tests/data/sequences_test.gbk               | 170 -------
 fastaq/tests/data/sequences_test.gbk.to_fasta      |  10 -
 fastaq/tests/data/sequences_test.line_length3.fa   |  12 -
 fastaq/tests/data/sequences_test_3-per-line.fa     |  19 -
 .../tests/data/sequences_test_cap_to_read_pairs.fa |  16 -
 .../sequences_test_cap_to_read_pairs.fa.paired.gz  | Bin 92 -> 0 bytes
 ...sequences_test_cap_to_read_pairs.fa.unpaired.gz | Bin 92 -> 0 bytes
 .../tests/data/sequences_test_deinterleaved_1.fa   |   4 -
 .../tests/data/sequences_test_deinterleaved_2.fa   |   4 -
 .../data/sequences_test_deinterleaved_bad2_1.fa    |   2 -
 .../data/sequences_test_deinterleaved_bad2_2.fa    |   4 -
 .../data/sequences_test_deinterleaved_bad_1.fa     |   4 -
 .../data/sequences_test_deinterleaved_bad_2.fa     |   2 -
 fastaq/tests/data/sequences_test_empty_file        |   0
 .../tests/data/sequences_test_enumerate_names.fa   |   8 -
 ...quences_test_enumerate_names.fa.out.keep_suffix |   8 -
 .../sequences_test_enumerate_names.fa.out.start.1  |   8 -
 ...test_enumerate_names.fa.out.start.1.rename_file |   5 -
 .../sequences_test_enumerate_names.fa.out.start.2  |   8 -
 fastaq/tests/data/sequences_test_extend_gaps.fa    |   8 -
 .../tests/data/sequences_test_extend_gaps.fa.out   |   4 -
 fastaq/tests/data/sequences_test_fai_test.fa       |   8 -
 fastaq/tests/data/sequences_test_fai_test.fa.fai   |   4 -
 fastaq/tests/data/sequences_test_fail_no_AT.fq     |   5 -
 fastaq/tests/data/sequences_test_fail_no_plus.fq   |   4 -
 fastaq/tests/data/sequences_test_fail_no_qual.fq   |   3 -
 fastaq/tests/data/sequences_test_fail_no_seq.fq    |   5 -
 ...sequences_test_fastaq_replace_bases.expected.fa |   2 -
 .../data/sequences_test_fastaq_replace_bases.fa    |   2 -
 ...sequences_test_fastaq_to_quasr_primers.expected |   2 -
 .../data/sequences_test_fastaq_to_quasr_primers.fa |   4 -
 .../data/sequences_test_filter_by_ids_file.fa      |   8 -
 .../sequences_test_filter_by_ids_file.fa.filtered  |   4 -
 ...nces_test_filter_by_ids_file.fa.filtered.invert |   4 -
 .../data/sequences_test_filter_by_ids_file.fa.ids  |   2 -
 .../tests/data/sequences_test_filter_by_regex.fa   |  10 -
 .../sequences_test_filter_by_regex.first-char-a.fa |   6 -
 ...sequences_test_filter_by_regex.first-of-pair.fa |   4 -
 .../data/sequences_test_filter_by_regex.numeric.fa |   2 -
 .../data/sequences_test_get_seqs_flanking_gaps.fa  |   4 -
 .../sequences_test_get_seqs_flanking_gaps.fa.out   |   3 -
 fastaq/tests/data/sequences_test_gffv3.gff         |   9 -
 fastaq/tests/data/sequences_test_gffv3.gff.fasta   |   4 -
 .../tests/data/sequences_test_gffv3.gff.to_fasta   |   4 -
 .../data/sequences_test_gffv3.no_FASTA_line.gff    |   8 -
 ...sequences_test_gffv3.no_FASTA_line.gff.to_fasta |   4 -
 .../tests/data/sequences_test_gffv3.no_seq.2.gff   |   6 -
 fastaq/tests/data/sequences_test_gffv3.no_seq.gff  |   4 -
 fastaq/tests/data/sequences_test_good_file.fq      |  11 -
 .../data/sequences_test_good_file.fq.to_fasta      |   4 -
 .../tests/data/sequences_test_good_file_mira.xml   |  13 -
 fastaq/tests/data/sequences_test_interleaved.fa    |   8 -
 fastaq/tests/data/sequences_test_interleaved.fq    |  16 -
 .../tests/data/sequences_test_interleaved_bad.fa   |   6 -
 fastaq/tests/data/sequences_test_length_filter.fa  |   6 -
 .../sequences_test_length_filter.min-0.max-1.fa    |   0
 .../sequences_test_length_filter.min-0.max-inf.fa  |   6 -
 .../sequences_test_length_filter.min-4.max-4.fa    |   2 -
 .../sequences_test_make_random_contigs.default.fa  |   4 -
 .../sequences_test_make_random_contigs.first-42.fa |   4 -
 ...ces_test_make_random_contigs.name-by-letters.fa |  56 ---
 .../sequences_test_make_random_contigs.prefix-p.fa |   4 -
 fastaq/tests/data/sequences_test_not_a_fastaq_file |   1 -
 fastaq/tests/data/sequences_test_one-per-line.fa   |  14 -
 .../tests/data/sequences_test_phylip.interleaved   |   8 -
 .../sequences_test_phylip.interleaved.to_fasta     |   6 -
 .../tests/data/sequences_test_phylip.interleaved2  |   7 -
 .../sequences_test_phylip.interleaved2.to_fasta    |   6 -
 .../data/sequences_test_phylip.made_by_seaview     |   6 -
 .../sequences_test_phylip.made_by_seaview.to_fasta |   6 -
 fastaq/tests/data/sequences_test_phylip.sequential |   7 -
 .../data/sequences_test_phylip.sequential.to_fasta |   6 -
 fastaq/tests/data/sequences_test_revcomp.fa        |   8 -
 fastaq/tests/data/sequences_test_search_string.fa  |   2 -
 .../data/sequences_test_search_string.fa.hits      |   4 -
 .../tests/data/sequences_test_split_fixed_size.fa  |  12 -
 .../sequences_test_split_fixed_size.fa.split.1     |   2 -
 .../sequences_test_split_fixed_size.fa.split.2     |   2 -
 .../sequences_test_split_fixed_size.fa.split.3     |   2 -
 .../sequences_test_split_fixed_size.fa.split.4     |   2 -
 .../sequences_test_split_fixed_size.fa.split.5     |   4 -
 .../sequences_test_split_fixed_size.fa.split.6     |   2 -
 ...sequences_test_split_fixed_size.fa.split.coords |   2 -
 ...test_split_fixed_size.fa.split.skip_if_all_Ns.1 |   2 -
 ...test_split_fixed_size.fa.split.skip_if_all_Ns.2 |   2 -
 ...test_split_fixed_size.fa.split.skip_if_all_Ns.3 |   4 -
 ...test_split_fixed_size.fa.split.skip_if_all_Ns.4 |   2 -
 ...split_fixed_size.fa.split.skip_if_all_Ns.coords |   1 -
 fastaq/tests/data/sequences_test_split_test.fa     |   8 -
 fastaq/tests/data/sequences_test_split_test.fa.2.1 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.2.2 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.2.3 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.2.4 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.3.1 |   4 -
 fastaq/tests/data/sequences_test_split_test.fa.3.2 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.3.3 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.4.1 |   4 -
 fastaq/tests/data/sequences_test_split_test.fa.4.2 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.4.3 |   2 -
 fastaq/tests/data/sequences_test_split_test.fa.6.1 |   6 -
 fastaq/tests/data/sequences_test_split_test.fa.6.2 |   2 -
 .../data/sequences_test_split_test.fa.6.limit2.1   |   4 -
 .../data/sequences_test_split_test.fa.6.limit2.2   |   2 -
 .../data/sequences_test_split_test.fa.6.limit2.3   |   2 -
 .../tests/data/sequences_test_split_test.long.fa   |   4 -
 .../data/sequences_test_split_test.long.fa.2.1     |   2 -
 .../data/sequences_test_split_test.long.fa.2.2     |   2 -
 .../data/sequences_test_strip_after_whitespace.fa  |   6 -
 ...quences_test_strip_after_whitespace.fa.to_fasta |   6 -
 .../data/sequences_test_strip_illumina_suffix.fq   |  12 -
 ...equences_test_strip_illumina_suffix.fq.stripped |  12 -
 .../tests/data/sequences_test_to_unique_by_id.fa   |  11 -
 .../data/sequences_test_to_unique_by_id.fa.out     |   6 -
 fastaq/tests/data/sequences_test_translate.fa      |   2 -
 .../tests/data/sequences_test_translate.fa.frame0  |   3 -
 .../tests/data/sequences_test_translate.fa.frame1  |   3 -
 .../tests/data/sequences_test_translate.fa.frame2  |   3 -
 fastaq/tests/data/sequences_test_trim_Ns_at_end.fa |  10 -
 .../data/sequences_test_trim_Ns_at_end.fa.trimmed  |   8 -
 fastaq/tests/data/sequences_test_trimmed.fq        |   8 -
 fastaq/tests/data/sequences_test_untrimmed.fq      |  16 -
 fastaq/tests/data/utils_test_file_transpose.txt    |   5 -
 fastaq/tests/data/utils_test_file_transposed.txt   |   3 -
 fastaq/tests/data/utils_test_not_really_zipped.gz  |   1 -
 fastaq/tests/data/utils_test_scaffolds.fa          |   8 -
 .../data/utils_test_scaffolds.fa.to_contigs.fa     |  10 -
 ..._test_scaffolds.fa.to_contigs.number_contigs.fa |  10 -
 fastaq/tests/data/utils_test_system_call.txt       |   1 -
 fastaq/tests/intervals_test.py                     | 212 --------
 fastaq/tests/sequences_test.py                     | 535 --------------------
 fastaq/tests/tasks_test.py                         | 449 -----------------
 fastaq/tests/utils_test.py                         |  80 ---
 fastaq/utils.py                                    |  86 ----
 {scripts => src}/fastaq_capillary_to_pairs         |   0
 {scripts => src}/fastaq_chunker                    |   0
 {scripts => src}/fastaq_count_sequences            |   0
 {scripts => src}/fastaq_deinterleave               |   0
 {scripts => src}/fastaq_enumerate_names            |   0
 {scripts => src}/fastaq_extend_gaps                |   0
 {scripts => src}/fastaq_fasta_to_fastq             |   0
 {scripts => src}/fastaq_filter                     |   0
 {scripts => src}/fastaq_get_ids                    |   0
 {scripts => src}/fastaq_get_seq_flanking_gaps      |   0
 {scripts => src}/fastaq_insert_or_delete_bases     |   0
 {scripts => src}/fastaq_interleave                 |   0
 {scripts => src}/fastaq_make_random_contigs        |   0
 {scripts => src}/fastaq_replace_bases              |   0
 {scripts => src}/fastaq_reverse_complement         |   0
 {scripts => src}/fastaq_scaffolds_to_contigs       |   0
 {scripts => src}/fastaq_search_for_seq             |   0
 {scripts => src}/fastaq_split_by_base_count        |   0
 {scripts => src}/fastaq_strip_illumina_suffix      |   0
 {scripts => src}/fastaq_to_fasta                   |   0
 {scripts => src}/fastaq_to_mira_xml                |   0
 {scripts => src}/fastaq_to_perfect_reads           |   0
 {scripts => src}/fastaq_to_quasr_primers_file      |   0
 {scripts => src}/fastaq_to_random_subset           |   0
 {scripts => src}/fastaq_to_tiling_bam              |   0
 {scripts => src}/fastaq_to_unique_by_id            |   0
 {scripts => src}/fastaq_translate                  |   0
 {scripts => src}/fastaq_trim_Ns_at_end             |   0
 {scripts => src}/fastaq_trim_ends                  |   0
 177 files changed, 4 insertions(+), 4215 deletions(-)

diff --git a/debian/changelog b/debian/changelog
index a5388a4..f58bb6f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,5 +1,5 @@
-Fastaq (0.1-1) UNRELEASED; urgency=low
+Fastaq (1.5.0-1) UNRELEASED; urgency=low
 
-  * Initial release (Closes: #<bug>)
+  * Initial release (Closes: #1234)
 
  -- DMPT <debian-med-packaging at lists.alioth.debian.org>  Thu, 24 May 2012 14:30:13 +0200
diff --git a/debian/watch b/debian/watch
index 5317993..dba8b5a 100644
--- a/debian/watch
+++ b/debian/watch
@@ -1,4 +1,4 @@
 version=3
 
-https://github.com/js21/Fastaq/tags \
-   /js21/Fastaq/archive/([.\d]+)\.tar\.gz
+https://github.com/sanger-pathogens/Fastaq/tags \
+   /sanger-pathogens/Fastaq/archive/([.\d]+)\.tar\.gz
diff --git a/fastaq/__init__.py b/fastaq/__init__.py
deleted file mode 100644
index 52ded75..0000000
--- a/fastaq/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-__all__ = ['utils', 'sequences', 'tasks', 'intervals']
-from fastaq import *
diff --git a/fastaq/intervals.py b/fastaq/intervals.py
deleted file mode 100644
index b320c63..0000000
--- a/fastaq/intervals.py
+++ /dev/null
@@ -1,117 +0,0 @@
-class Error (Exception): pass
-
-
-class Interval:
-    '''A class to deal with intervals in a genome. Can do things like intersections, unions etc'''
-    def __init__(self, start, end):
-        try:
-            self.start = int(start)
-            self.end = int(end)
-        except ValueError:
-            raise Error('Error making interval from :"' + str(start) + '" and "' + str(end) + '"')
-
-        if self.end < self.start:
-            raise Error('Error making interval ' + str(self) + '.  end < start.')
-
-    def __len__(self):
-        return self.end - self.start + 1
-
-    def __eq__(self, other):
-        return type(other) is type(self) and self.__dict__ == other.__dict__
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def __str__(self):
-        return '(' + str(self.start) + ',' + str(self.end) + ')'
-
-    def __lt__(self, i):
-        return self.start < i.start or (self.start == i.start and self.end < i.end)
-
-    def __le__(self, i):
-        return self.start < i.start or (self.start == i.start and self.end <= i.end)
-
-    def intersects(self, i):
-        '''Returns true iff this interval intersects the interval i'''
-        return self.start <= i.end and i.start <= self.end
-
-    def contains(self, i):
-        '''Returns true iff this interval contains the interval i'''
-        return self.start <= i.start and i.end <= self.end
-
-    def union(self, i):
-        '''If intervals intersect, returns their union, otherwise returns None'''
-        if self.intersects(i) or self.end + 1 == i.start or i.end + 1 == self.start:
-            return Interval(min(self.start, i.start), max(self.end, i.end))
-        else:
-            return None
-
-    def union_fill_gap(self, i):
-        '''Like union, but ignores whether the two intervals intersect or not'''
-        return Interval(min(self.start, i.start), max(self.end, i.end))
-
-    def intersection(self, i):
-        '''If intervals intersect, returns their intersection, otherwise returns None'''
-        if self.intersects(i):
-            return Interval(max(self.start, i.start), min(self.end, i.end))
-        else:
-            return None
-
-
-def intersection(l1, l2):
-    '''Returns intersection of two lists.  Assumes the lists are sorted by start positions'''
-    if len(l1) == 0 or len(l2) == 0:
-        return []
-
-    out = []
-    l2_pos = 0
-
-    for l in l1:
-        while l2_pos < len(l2) and l2[l2_pos].end < l.start:
-            l2_pos += 1
-
-        if l2_pos == len(l2):
-            break
-
-        while l2_pos < len(l2) and l.intersects(l2[l2_pos]):
-            out.append(l.intersection(l2[l2_pos]))
-            l2_pos += 1
-
-        l2_pos = max(0, l2_pos - 1)
-
-    return out
-
-
-def merge_overlapping_in_list(l):
-    '''Sorts list, merges any overlapping intervals, and also adjacent intervals. e.g.
-       [0,1], [1,2] would be merge to [0,.2].'''
-    i = 0
-    l.sort()
-
-    while i < len(l) - 1:
-        u = l[i].union(l[i+1])
-        if u is not None:
-            l[i] = u
-            l.pop(i+1)
-        else:
-            i += 1
-
-
-def remove_contained_in_list(l):
-    '''Sorts list in place, then removes any intervals that are completely
-       contained inside another interval'''
-    i = 0
-    l.sort()
-
-    while i < len(l) - 1:
-       if l[i+1].contains(l[i]):
-           l.pop(i)
-       elif l[i].contains(l[i+1]):
-           l.pop(i+1)
-       else:
-           i += 1
-
-
-def length_sum_from_list(l):
-    '''Returns total length of intervals from a list'''
-    return sum([len(x) for x in l])
diff --git a/fastaq/sequences.py b/fastaq/sequences.py
deleted file mode 100644
index 0ce03f8..0000000
--- a/fastaq/sequences.py
+++ /dev/null
@@ -1,549 +0,0 @@
-import re
-import string
-
-from fastaq import utils, intervals
-
-class Error (Exception): pass
-
-
-# python 3's seek is glacially slow. When we read a fasta file, we know
-# we've reached the end of a sequence when we get a new line starting with
-# '>'. Instead of using seek and tell, we just remember the previous line
-# of the file, for any given filehandle
-previous_lines = {}
-
-
-codon2aa = {
-'GCA': 'A',
-'GCC': 'A',
-'GCG': 'A',
-'GCT': 'A',
-'AGA': 'R',
-'AGG': 'R',
-'CGA': 'R',
-'CGC': 'R',
-'CGG': 'R',
-'CGT': 'R',
-'AAC': 'N',
-'AAT': 'N',
-'GAC': 'D',
-'GAT': 'D',
-'TGC': 'C',
-'TGT': 'C',
-'GAA': 'E',
-'GAG': 'E',
-'CAA': 'Q',
-'CAG': 'Q',
-'GGA': 'G',
-'GGC': 'G',
-'GGG': 'G',
-'GGT': 'G',
-'CAC': 'H',
-'CAT': 'H',
-'ATA': 'I',
-'ATC': 'I',
-'ATT': 'I',
-'TTA': 'L',
-'TTG': 'L',
-'CTA': 'L',
-'CTC': 'L',
-'CTG': 'L',
-'CTT': 'L',
-'AAA': 'K',
-'AAG': 'K',
-'ATG': 'M',
-'TTC': 'F',
-'TTT': 'F',
-'CCA': 'P',
-'CCC': 'P',
-'CCG': 'P',
-'CCT': 'P',
-'AGC': 'S',
-'AGT': 'S',
-'TCA': 'S',
-'TCC': 'S',
-'TCG': 'S',
-'TCT': 'S',
-'ACA': 'T',
-'ACC': 'T',
-'ACG': 'T',
-'ACT': 'T',
-'TGG': 'W',
-'TAC': 'Y',
-'TAT': 'Y',
-'GTA': 'V',
-'GTC': 'V',
-'GTG': 'V',
-'GTT': 'V',
-'TAA': '*',
-'TAG': '*',
-'TGA': '*'}
-
-def file_reader(fname, read_quals=False):
-    '''Iterates over a FASTA or FASTQ file, yielding the next sequence in the file until there are no more sequences'''
-    f = utils.open_file_read(fname)
-    line = f.readline()
-    phylip_regex = re.compile('^\s*[0-9]+\s+[0-9]+$')
-    gbk_regex = re.compile('^LOCUS\s+\S')
-
-    if line.startswith('>'):
-        seq = Fasta()
-        previous_lines[f] = line
-    elif line.startswith('##gff-version 3'):
-        seq = Fasta()
-        # if a GFF file, need to skip past all the annotation
-        # and get to the fasta sequences at the end of the file
-        while not line.startswith('>'):
-            line = f.readline()
-            if not line:
-                utils.close(f)
-                raise Error('No sequences found in GFF file "' + fname + '"')
-            
-        seq = Fasta()
-        previous_lines[f] = line
-    elif line.startswith('ID   ') and line[5] != ' ':
-        seq = Embl()
-        previous_lines[f] = line
-    elif gbk_regex.search(line):
-        seq = Embl()
-        previous_lines[f] = line
-    elif line.startswith('@'):
-        seq = Fastq()
-        previous_lines[f] = line
-    elif phylip_regex.search(line):
-        # phylip format could be interleaved or not, need to look at next
-        # couple of lines to figure that out. Don't expect these files to
-        # be too huge, so just store all the sequences in memory
-        number_of_seqs, bases_per_seq = line.strip().split()
-        number_of_seqs = int(number_of_seqs)
-        bases_per_seq = int(bases_per_seq)
-        got_blank_line = False
-
-        first_line = line
-        seq_lines = []
-        while 1:
-            line = f.readline()
-            if line == '':
-                break
-            elif line == '\n':
-                got_blank_line = True
-            else:
-                seq_lines.append(line.rstrip())
-        utils.close(f)
-
-        if len(seq_lines) == 1 or len(seq_lines) == number_of_seqs:
-            sequential = True
-        elif seq_lines[0][10] != ' ' and seq_lines[1][10] == ' ':
-            sequential = True
-        else:
-            sequential = False
-            
-        # if the 11th char of second sequence line is a space,  then the file is sequential, e.g.:
-        # GAGCCCGGGC AATACAGGGT AT
-        # as opposed to:
-        # Salmo gairAAGCCTTGGC AGTGCAGGGT
-        if sequential:
-            current_id = None
-            current_seq = ''
-            for line in seq_lines:
-                if len(current_seq) == bases_per_seq or len(current_seq) == 0:
-                    if current_id is not None:
-                        yield Fasta(current_id, current_seq.replace('-', ''))
-                    current_seq = ''
-                    current_id, new_bases = line[0:10].rstrip(), line.rstrip()[10:]
-                else:
-                    new_bases = line.rstrip()
-                       
-                current_seq += new_bases.replace(' ','')
-            
-            yield Fasta(current_id, current_seq.replace('-', ''))
-        else:
-            # seaview files start all seqs at pos >=12. Other files start
-            # their sequence at the start of the line
-            if seq_lines[number_of_seqs + 1][0] == ' ':
-                first_gap_pos = seq_lines[0].find(' ')
-                end_of_gap = first_gap_pos
-                while seq_lines[0][end_of_gap] == ' ':
-                    end_of_gap += 1
-                first_seq_base = end_of_gap
-            else:
-                first_seq_base = 10
-
-            seqs = []
-            for i in range(number_of_seqs):
-                name, bases = seq_lines[i][0:first_seq_base].rstrip(), seq_lines[i][first_seq_base:]
-                seqs.append(Fasta(name, bases))
-            
-            for i in range(number_of_seqs, len(seq_lines)):
-                seqs[i%number_of_seqs].seq += seq_lines[i]
-
-            for fa in seqs:
-                fa.seq = fa.seq.replace(' ','').replace('-','')
-                yield fa
-                
-        return
-    elif line == '':
-        utils.close(f)
-        return
-    else:
-        utils.close(f)
-        raise Error('Error determining file type from file "' + fname + '". First line is:\n' + line.rstrip())
-
-    try:
-        while seq.get_next_from_file(f, read_quals):
-            yield seq
-    finally:
-        utils.close(f)
-
-
-class Fasta:
-    '''Class to store and manipulate FASTA sequences. They have two things: a name and a sequence'''
-    # this defines the line length when printing sequences
-    line_length = 60
-
-    def _get_id_from_header_line(self, line):
-        if line.startswith('>'):
-            return line.rstrip()[1:]
-        else:
-            raise Error('Error! expected line starting with ">", but got this:\n', line)
-
-
-    def __init__(self, id_in=None, seq_in=None):
-        self.id = id_in
-        self.seq = seq_in
-
-    def __eq__(self, other):
-        return type(other) is type(self) and self.__dict__ == other.__dict__
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def __len__(self):
-        return len(self.seq)
-
-    def split_capillary_id(self):
-        '''Gets the prefix and suffix of an name of a capillary read, e.g. xxxxx.p1k or xxxx.q1k. Returns a tuple (prefix, suffx)'''
-        try:
-            a = self.id.rsplit('.', 1)
-            if a[1].startswith('p'):
-                dir = 'fwd'
-            elif a[1].startswith('q'):
-                dir = 'rev'
-            else:
-                dir = 'unk'
-
-            return {'prefix': a[0], 'dir': dir, 'suffix':a[1]}
-        except:
-            raise Error('Error in split_capillary_id() on ID', self.id)
-
-    def strip_after_first_whitespace(self):
-        '''Removes everything in the name after the first whitespace character'''
-        self.id = self.id.split()[0]
-
-    def strip_illumina_suffix(self):
-        '''Removes any trailing /1 or /2 off the end of the name'''
-        if self.id.endswith('/1') or self.id.endswith('/2'):
-            self.id = self.id[:-2]
-
-    def revcomp(self):
-        '''Reverse complements the sequence'''
-        self.seq = self.seq.translate(str.maketrans("ATCGatcg", "TAGCtagc"))[::-1]
-
-    def is_all_Ns(self, start=0, end=None):
-        '''Returns true if the sequence is all Ns (upper or lower case)'''
-        if end is not None:
-            if start > end:
-                raise Error('Error in is_all_Ns. Start coord must be <= end coord')
-            end += 1
-        else:
-            end = len(self)
-
-        if len(self) == 0:
-            return False
-        else:
-            return re.search('[^Nn]', self.seq[start:end]) is None
-
-    def trim_Ns(self):
-        '''Removes any leading or trailing N or n characters from the sequence'''
-        self.seq = self.seq.strip('Nn')
-
-    def replace_bases(self, old, new):
-        '''Replaces all occurences of 'old' with 'new' '''
-        self.seq = self.seq.replace(old, new)
-
-    def replace_interval(self, start, end, new):
-        '''Replaces the sequence from start to end with the sequence "new"'''
-        if start > end or start > len(self) - 1 or end > len(self) - 1:
-            raise Error('Error replacing bases ' + str(start) + '-' + str(end) + ' in sequence ' + self.id)
-
-        self.seq = self.seq[0:start] + new + self.seq[end + 1:]
-
-    def gaps(self, min_length = 1):
-        '''Finds the positions of all gaps in the sequence that are at least min_length long. Returns a list of Intervals. Coords are zero-based'''
-        gaps = []
-        regex = re.compile('N+', re.IGNORECASE)
-        for m in regex.finditer(self.seq):
-             if m.span()[1] - m.span()[0] + 1 >= min_length:
-                 gaps.append(intervals.Interval(m.span()[0], m.span()[1] - 1))
-        return gaps
-
-    def contig_coords(self):
-        '''Finds coords of contigs, i.e. everything that's not a gap (N or n). Returns a list of Intervals. Coords are zero-based'''
-        # contigs are the opposite of gaps, so work out the coords from the gap coords
-        gaps = self.gaps()
-
-        if len(gaps) == 0:
-            return [intervals.Interval(0, len(self) - 1)]
-
-        coords = [0]
-        for g in gaps:
-            if g.start == 0:
-                coords = [g.end + 1]
-            else:
-                coords += [g.start - 1, g.end + 1]
-
-        if coords[-1] < len(self):
-            coords.append(len(self) - 1)
-
-        return [intervals.Interval(coords[i], coords[i+1]) for i in range(0, len(coords)-1,2)]
-
-
-
-    # Fills the object with the next sequence in the file. Returns
-    # True if this was successful, False if no more sequences in the file.
-    # If reading a file of quality scores, set read_quals = True
-    def get_next_from_file(self, f, read_quals=False):
-        if f in previous_lines:
-            if previous_lines[f] == None:
-                self.id = self.seq = None
-                return False
-            else:
-                self.id = self._get_id_from_header_line(previous_lines[f])
-        else:
-            line = '\n'
-            while line == '\n':
-                line = f.readline()
-            self.id = self._get_id_from_header_line(line)
-
-        self.seq = ''
-        seq_lines = [] # much faster to store the seq lines in an array,
-                       # then join at the end
-
-        while 1:
-            line = f.readline()
-
-            if line.startswith('>'):
-                previous_lines[f] = line.rstrip()
-                break
-            elif line == '':
-                previous_lines[f] = None
-                break
-            else:
-                 seq_lines.append(line.rstrip())
-
-        if read_quals:
-            self.seq = ' '.join(seq_lines)
-        else:
-            self.seq = ''.join(seq_lines)
-        return True
-
-    def __str__(self):
-        if Fasta.line_length == 0:
-            return '>' + self.id + '\n' + self.seq
-        else:
-            return '>' + self.id + '\n' + '\n'.join(self.seq[i:i+Fasta.line_length] for i in range(0, len(self), Fasta.line_length))
-
-    def __getitem__(self, index):
-        return self.seq[index]
-
-    def trim(self, start, end):
-        '''Removes first 'start'/'end' bases off the start/end of the sequence'''
-        self.seq = self.seq[start:len(self.seq) - end]
-
-    # qual_scores should be a list of quality scores
-    def to_Fastq(self, qual_scores):
-        '''Returns a Fastq object. qual_scores expected to be a list of numbers, like you would get in a .qual file'''
-        if len(self) != len(qual_scores):
-            raise Error('Error making Fastq from Fasta, lengths differ.', self.id)
-        return Fastq(self.id, self.seq, ''.join([chr(max(0, min(x, 93)) + 33) for x in qual_scores]))
-
-    def search(self, search_string):
-        '''Finds every occurence (including overlapping ones) of the search_string, including on the reverse strand. Returns a list where each element is a tuple (position, strand) where strand is in ['-', '+']. Positions are zero-based'''
-        seq = self.seq.upper()
-        search_string = search_string.upper()
-        pos = 0
-        found = seq.find(search_string, pos)
-        hits = []
-
-        while found != -1:
-            hits.append((found, '+'))
-            pos = found + 1
-            found = seq.find(search_string, pos)
-
-
-        pos = 0
-        search_string = Fasta('x', search_string)
-        search_string.revcomp()
-        search_string = search_string.seq
-        found = seq.find(search_string, pos)
-
-        while found != -1:
-            hits.append((found, '-'))
-            pos = found + 1
-            found = seq.find(search_string, pos)
-
-        return hits
-
-    def translate(self, frame=0):
-        '''Returns a Fasta sequence, translated into amino acids. Starts translating from 'frame', where frame expected to be 0,1 or 2'''
-        return Fasta(self.id, ''.join([codon2aa.get(self.seq[x:x+3].upper(), 'X') for x in range(frame, len(self)-1-frame, 3)]))
-
-
-class Embl(Fasta):
-    '''Exactly the same as Fasta, but reading seqs from a file works differently'''
-    def __eq__(self, other):
-        return type(other) in [Fasta, Embl] and  type(self) in [Fasta, Embl] and self.__dict__ == other.__dict__
-
-    def _get_id_from_header_line(self, line):
-        if line.startswith('ID   ') and line[5] != ' ':
-            return line.split()[1].rstrip(';')
-        elif line.startswith('LOCUS'):
-            return line.split()[1]
-        else:
-            raise Error('Error! expected line starting with "ID" or "LOCUS", but got this:\n', line)
-
-    def get_next_from_file(self, f, read_quals=False):
-        if f in previous_lines:
-            line = ''
-            if previous_lines[f] == None:
-                self.id = self.seq = None
-                return False
-            else:
-                self.id = self._get_id_from_header_line(previous_lines[f])
-        else:
-            line = '\n'
-            while line == '\n':
-                line = f.readline()
-            self.id = self._get_id_from_header_line(line)
-
-        self.seq = ''
-        seq_lines = []
- 
-        while not (line.startswith('SQ') or line.rstrip() == 'ORIGIN'):
-            line = f.readline()
-            if line == '':
-                raise Error('Error! No SQ or ORIGIN line found for sequence ' + self.id)
-        
-        line = f.readline()
-
-        while not line.startswith('//'):
-            if line == '' or line[0] != ' ':
-                raise Error('Error! Did not find end of sequence ' + self.id)
-            seq_lines.append(''.join(line.rstrip().strip(' 0123456789').split()))
-            line = f.readline()
-            
-
-        while 1:
-            if line.startswith('ID') or line.startswith('LOCUS'):
-                previous_lines[f] = line.rstrip()
-                break
-            elif line == '':
-                previous_lines[f] = None
-                break
-
-            line = f.readline()
-
-        self.seq = ''.join(seq_lines)
-        return True
-
-class Fastq(Fasta):
-    '''Class to store and manipulate FASTQ sequences. They have three things: a name, sequence and string of quality scores'''
-    def __init__(self, id_in=None, seq_in=None, qual_in=None):
-        super().__init__(id_in, seq_in)
-        self.qual = qual_in
-        if (not self.seq == self.qual == None) and len(self.qual) != len(self.seq):
-            raise Error('Error constructing Fastq. Mismatch in sequence and quality length\n' + str(self))
-
-    def __str__(self):
-        return '@' + self.id + '\n' + self.seq + '\n+\n' + self.qual
-
-    def __eq__(self, other):
-        return type(other) is type(self) and self.__dict__ == other.__dict__
-
-    def get_next_from_file(self, f, read_quals=False):
-        if f in previous_lines:
-            line = previous_lines[f]
-            del previous_lines[f]
-        else:
-            line = f.readline()
-
-        while line == '\n':
-            line = f.readline()
-
-        if not line:
-            self = Fastq('', '', '')
-            return False
-
-        if not line.startswith('@'):
-            raise Error('Error getting next sequence from fastq file. Got line:\n' + line)
-
-        self.id = line.rstrip()[1:]
-        line = f.readline()
-        if not line:
-            raise Error('Error getting next sequence from fastq file, sequence has ID ' + self.id)
-
-        self.seq = line.strip()
-
-        line = f.readline()
-        if not (line and line.startswith('+')):
-            raise Error('Error getting next sequence from fastq file, no line starting with +,  sequence has ID ' + self.id)
-
-        line = f.readline()
-        if not line:
-            raise Error('Error getting next sequence from fastq file, sequence has ID ' + self.id)
-
-        self.qual = line.rstrip()
-        return True
-
-    def revcomp(self):
-        '''Reverse complements the sequence'''
-        super().revcomp()
-        self.qual = self.qual[::-1]
-
-    def trim(self, start, end):
-        '''Removes first 'start'/'end' bases off the start/end of the sequence'''
-        super().trim(start, end)
-        self.qual = self.qual[start:len(self.qual) - end]
-
-    def to_Fasta_and_qual(self):
-        quals = [ord(x) - 33 for x in self.qual]
-        return (Fasta(self.id, self.seq), quals)
-
-
-    def trim_Ns(self):
-        '''Removes any leading or trailing N or n characters from the sequence'''
-        # get index of first base that is not an N
-        i = 0
-        while i < len(self) and self.seq[i] in 'nN':
-            i += 1
-
-        # strip off start of sequence and quality
-        self.seq = self.seq[i:]
-        self.qual = self.qual[i:]
-
-        # strip the ends
-        self.seq = self.seq.rstrip('Nn')
-        self.qual = self.qual[:len(self.seq)]
-
-    def replace_interval(self, start, end, new, qual_string):
-        '''Replaces the sequence from start to end with the sequence "new"'''
-        if len(new) != len(qual_string):
-            raise Error('Length of new seq and qual string in replace_interval() must be equal. Cannot continue')
-        super().replace_interval(start, end, new)
-        self.qual = self.qual[0:start] + qual_string + self.qual[end + 1:]
-
-    def translate(self):
-        '''Returns a Fasta sequence, translated into amino acids. Starts translating from 'frame', where frame expected to be 0,1 or 2'''
-        fa = super().translate()
-        return Fastq(fa.id, fa.seq, 'I'*len(fa.seq))
-
diff --git a/fastaq/tasks.py b/fastaq/tasks.py
deleted file mode 100644
index ad10b06..0000000
--- a/fastaq/tasks.py
+++ /dev/null
@@ -1,556 +0,0 @@
-import re
-import copy
-import random
-from fastaq import sequences, utils
-
-class Error (Exception): pass
-
-def capillary_to_pairs(infile, outprefix):
-    # hash the sequences, only taking longest where an end has been sequenced more than once
-    seq_reader = sequences.file_reader(infile)
-    fwd_seqs = {}
-    rev_seqs = {}
-    unpaired_seqs = {}
-
-    for seq in seq_reader:
-        id_info = seq.split_capillary_id()
-        if id_info['dir'] == 'fwd':
-            seq.id = id_info['prefix'] + '/1'
-            h = fwd_seqs
-        elif id_info['dir'] == 'rev':
-            seq.id = id_info['prefix'] + '/2'
-            h = rev_seqs
-        else:
-            seq.id = id_info['prefix']
-            h = unpaired_seqs
-
-        key = id_info['prefix']
-
-        if key not in h or len(h[key]) < len(seq):
-            h[key] = copy.copy(seq)
-
-    # write the output files
-    f_pe = utils.open_file_write(outprefix + '.paired.gz')
-    f_up = utils.open_file_write(outprefix + '.unpaired.gz')
-
-    for id in fwd_seqs:
-        if id in rev_seqs:
-            print(fwd_seqs[id], file=f_pe)
-            print(rev_seqs[id], file=f_pe)
-            del rev_seqs[id]
-        else:
-            print(fwd_seqs[id], file=f_up)
-
-    for seq in rev_seqs.values():
-        print(seq, file=f_up)
-
-    for seq in unpaired_seqs.values():
-        print(seq, file=f_up)
-
-    utils.close(f_pe)
-    utils.close(f_up)
-
-
-def count_sequences(infile):
-    '''Returns the number of sequences in a file'''
-    seq_reader = sequences.file_reader(infile)
-    n = 0
-    for seq in seq_reader:
-        n += 1
-    return n
-
-
-def deinterleave(infile, outfile_1, outfile_2, fasta_out=False):
-    seq_reader = sequences.file_reader(infile)
-    f_1 = utils.open_file_write(outfile_1)
-    f_2 = utils.open_file_write(outfile_2)
-    for seq in seq_reader:
-        if fasta_out:
-            print(sequences.Fasta(seq.id, seq.seq), file=f_1)
-        else:
-            print(seq, file=f_1)
-        try:
-            next(seq_reader)
-        except StopIteration:
-            utils.close(f_1)
-            utils.close(f_2)
-            raise Error('Error getting mate for sequence. Cannot continue')
-        if fasta_out:
-            print(sequences.Fasta(seq.id, seq.seq), file=f_2)
-        else:
-            print(seq, file=f_2)
-
-    utils.close(f_1)
-    utils.close(f_2)
-
-
-def enumerate_names(infile, outfile, start_index=1, keep_illumina_suffix=False, rename_file=None):
-    seq_reader = sequences.file_reader(infile)
-    fout_seqs = utils.open_file_write(outfile)
-    counter = start_index
-
-    if keep_illumina_suffix:
-        sequence_suffixes = ['/1', '/2']
-    else:
-        sequence_suffixes = []
-
-
-    if rename_file is not None:
-        fout_rename = utils.open_file_write(rename_file)
-        print('#old\tnew', file=fout_rename)
-
-    for seq in seq_reader:
-        old_id = seq.id
-        seq.id = str(counter)
-
-        for suff in sequence_suffixes:
-            if old_id.endswith(suff):
-                seq.id += suff
-                break
-
-        if rename_file is not None:
-            print(old_id, seq.id, sep='\t', file=fout_rename)
-
-        print(seq, file=fout_seqs)
-        counter += 1
-
-    utils.close(fout_seqs)
-
-    if rename_file is not None:
-        utils.close(fout_rename)
-
-
-def extend_gaps(infile, outfile, trim):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        if len(seq) < 2 * trim:
-            continue
-
-        gaps = seq.gaps()
-        bases = list(seq.seq)
-
-        # extend the length of each gap
-        for gap in gaps:
-            left_start = max(gap.start - trim, 0)
-            right_end = min(gap.end + trim + 1, len(seq))
-
-            for i in range(left_start, gap.start):
-                bases[i] = 'N'
-
-            for i in range(gap.end, right_end):
-                bases[i] = 'N'
-
-        seq.seq = ''.join(bases)
-
-        # trim start/end bases and tidy up any resulting Ns at either end of the trimmed seq
-        seq.trim(trim, trim)
-        seq.trim_Ns()
-
-        # check that there is some non-N sequence left over
-        regex = re.compile('[^nN]')
-        if regex.search(seq.seq) is not None:
-            print(seq, file=fout)
-
-    utils.close(fout)
-
-
-def fasta_to_fastq(fasta_in, qual_in, outfile):
-    fa_reader = sequences.file_reader(fasta_in)
-    qual_reader = sequences.file_reader(qual_in, read_quals=True)
-    f_out = utils.open_file_write(outfile)
-
-    for seq in fa_reader:
-        qual = next(qual_reader)
-        if seq.id != qual.id:
-            utils.close(f_out)
-            raise Error('Mismatch in names from fasta and qual file', seq.id, qual.id)
-
-        qual.seq = [int(x) for x in qual.seq.split()]
-        print(seq.to_Fastq(qual.seq), file=f_out)
-
-    utils.close(f_out)
-
-
-def fastaq_to_mira_xml(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-    print('<?xml version="1.0"?>', '<trace_volume>', sep='\n', file=fout)
-
-    for seq in seq_reader:
-        print('    <trace>',
-              '        <trace_name>' + seq.id + '</trace_name>',
-              '        <clip_quality_right>' + str(len(seq)) + '</clip_quality_right>',
-              '        <clip_vector_left>1</clip_vector_left>',
-              '    </trace>', sep='\n', file=fout)
-
-
-    print('</trace_volume>', file=fout)
-    utils.close(fout)
-
-
-def file_to_dict(infile, d):
-    seq_reader = sequences.file_reader(infile)
-    for seq in seq_reader:
-        d[seq.id] = copy.copy(seq)
-
-
-def filter(infile, outfile, minlength=0, maxlength=float('inf'), regex=None, ids_file=None, invert=False):
-    ids_from_file = set()
-    if ids_file is not None:
-        f = utils.open_file_read(ids_file)
-        for line in f:
-            ids_from_file.add(line.rstrip())
-        utils.close(f)
-
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-    if regex is not None:
-        r = re.compile(regex)
-
-    for seq in seq_reader:
-        hit = minlength <= len(seq) <= maxlength \
-              and (regex is None or r.search(seq.id) is not None) \
-              and (ids_file is None or seq.id in ids_from_file)
-
-        if hit != invert:
-            print(seq, file=f_out)
-    utils.close(f_out)
-    
-
-def get_ids(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-    for seq in seq_reader:
-        print(seq.id, file=f_out)
-    utils.close(f_out)
-    
-
-def get_seqs_flanking_gaps(infile, outfile, left, right):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    print('#id', 'gap_start', 'gap_end', 'left_bases', 'right_bases', sep='\t', file=fout)
-
-    for seq in seq_reader:
-        gaps = seq.gaps()
-
-        for gap in gaps:
-            left_start = max(gap.start - left, 0)
-            right_end = min(gap.end + right + 1, len(seq))
-            print(seq.id,
-                  gap.start + 1,
-                  gap.end + 1,
-                  seq.seq[left_start:gap.start],
-                  seq.seq[gap.end + 1:right_end],
-                  sep='\t', file=fout)
-
-    utils.close(fout)
-
-
-def interleave(infile_1, infile_2, outfile):
-    seq_reader_1 = sequences.file_reader(infile_1)
-    seq_reader_2 = sequences.file_reader(infile_2)
-    f_out = utils.open_file_write(outfile)
-
-    for seq_1 in seq_reader_1:
-        try:
-            seq_2 = next(seq_reader_2)
-        except:
-            utils.close(f_out)
-            raise Error('Error getting mate for sequence', seq_1.id, ' ... cannot continue')
-
-        print(seq_1, file=f_out)
-        print(seq_2, file=f_out)
-
-    try:
-        seq_2 = next(seq_reader_2)
-    except:
-        seq_2 = None
-
-    if seq_2 is not None:
-        utils.close(f_out)
-        raise Error('Error getting mate for sequence', seq_2.id, ' ... cannot continue')
-
-    utils.close(f_out)
-
-
-def make_random_contigs(contigs, length, outfile, name_by_letters=False, prefix='', seed=None, first_number=1):
-    '''Makes a multi fasta file of random sequences, all the same length'''
-    random.seed(a=seed)
-    fout = utils.open_file_write(outfile)
-    letters = list('ABCDEFGHIJKLMNOPQRSTUVWXYZ')
-    letters_index = 0
-
-    for i in range(contigs):
-        if name_by_letters:
-            name = letters[letters_index]
-            letters_index += 1
-            if letters_index == len(letters):
-                letters_index = 0
-        else:
-            name = str(i + first_number)
-
-        fa = sequences.Fasta(prefix + name, ''.join([random.choice('ACGT') for x in range(length)]))
-        print(fa, file=fout)
-
-    utils.close(fout)
-
-
-def reverse_complement(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq.revcomp()
-        print(seq, file=fout)
-
-    utils.close(fout)
-
-
-def scaffolds_to_contigs(infile, outfile, number_contigs=False):
-    '''Makes a file of contigs from scaffolds by splitting at every N.
-       Use number_contigs=True to add .1, .2, etc onto end of each
-       contig, instead of default to append coordinates.'''
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        contigs = seq.contig_coords()
-        counter = 1
-        for contig in contigs:
-            if number_contigs:
-                name = seq.id + '.' + str(counter)
-                counter += 1
-            else:
-                name = '.'.join([seq.id, str(contig.start + 1), str(contig.end + 1)])
-            print(sequences.Fasta(name, seq[contig.start:contig.end+1]), file=fout)
-
-    utils.close(fout)
-
-
-def search_for_seq(infile, outfile, search_string):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        hits = seq.search(search_string)
-        for hit in hits:
-            print(seq.id, hit[0]+1, hit[1], sep='\t', file=fout)
-
-    utils.close(fout)
-
-
-def translate(infile, outfile, frame=0):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        print(seq.translate(frame=frame), file=fout)
-
-    utils.close(fout)
-    
-
-def trim(infile, outfile, start, end):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq.trim(start, end)
-        if len(seq):
-            print(seq, file=fout)
-
-    utils.close(fout)
-
-
-def trim_Ns_at_end(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    fout = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq.trim_Ns()
-        if len(seq):
-            print(seq, file=fout)
-
-    utils.close(fout)
-
-
-def lengths_from_fai(fai_file, d):
-    f = utils.open_file_read(fai_file)
-    for line in f:
-        (id, length) = line.rstrip().split()[:2]
-        d[id] = int(length)
-    utils.close(f)
-
-
-def split_by_base_count(infile, outfiles_prefix, max_bases, max_seqs=None):
-    '''Splits a fasta/q file into separate files, file size determined by number of bases.
-
-    Puts <= max_bases in each split file The exception is a single sequence >=max_bases
-    is put in its own file.  This does not split sequences.
-    '''
-    seq_reader = sequences.file_reader(infile)
-    base_count = 0
-    file_count = 1
-    seq_count = 0
-    fout = None
-    if max_seqs is None:
-        max_seqs = float('inf')
-
-    for seq in seq_reader:
-        if base_count == 0:
-            fout = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-            file_count += 1
-
-        if base_count + len(seq) > max_bases or seq_count >= max_seqs:
-            if base_count == 0:
-                print(seq, file=fout)
-                utils.close(fout)
-            else:
-                utils.close(fout)
-                fout = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-                print(seq, file=fout)
-                base_count = len(seq)
-                file_count += 1
-                seq_count = 1
-        else:
-            base_count += len(seq)
-            seq_count += 1
-            print(seq, file=fout)
-
-    utils.close(fout)
-
-
-def split_by_fixed_size(infile, outfiles_prefix, chunk_size, tolerance, skip_if_all_Ns=False):
-    '''Splits  fasta/q file into separate files, with up to (chunk_size + tolerance) bases in each file'''
-    file_count = 1
-    coords = []
-    small_sequences = []  # sequences shorter than chunk_size
-    seq_reader = sequences.file_reader(infile)
-    f_coords = utils.open_file_write(outfiles_prefix + '.coords')
-
-    for seq in seq_reader:
-        if skip_if_all_Ns and seq.is_all_Ns():
-             continue
-        if len(seq) < chunk_size:
-            small_sequences.append(copy.copy(seq))
-        elif len(seq) <= chunk_size + tolerance:
-            f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-            print(seq, file=f)
-            utils.close(f)
-            file_count += 1
-        else:
-            # make list of chunk coords
-            chunks = [(x,x+chunk_size) for x in range(0, len(seq), chunk_size)]
-            if chunks[-1][1] - 1 > len(seq):
-                chunks[-1] = (chunks[-1][0], len(seq))
-            if len(chunks) > 1 and (chunks[-1][1] - chunks[-1][0]) <= tolerance:
-                chunks[-2] = (chunks[-2][0], chunks[-1][1])
-                chunks.pop()
-
-            # write one output file per chunk
-            offset = 0
-            for chunk in chunks:
-                if not(skip_if_all_Ns and seq.is_all_Ns(start=chunk[0], end=chunk[1]-1)):
-                    f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-                    chunk_id = seq.id + ':' + str(chunk[0]+1) + '-' + str(chunk[1])
-                    print(sequences.Fasta(chunk_id, seq[chunk[0]:chunk[1]]), file=f)
-                    print(chunk_id, seq.id, offset, sep='\t', file=f_coords)
-                    utils.close(f)
-                    file_count += 1
-
-                offset += chunk[1] - chunk[0]
-
-    # write files of small sequences
-    if len(small_sequences):
-        f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-        file_count += 1
-        base_count = 0
-        for seq in small_sequences:
-            if base_count > 0 and base_count + len(seq) > chunk_size + tolerance:
-                utils.close(f)
-                f = utils.open_file_write(outfiles_prefix + '.' + str(file_count))
-                file_count += 1
-                base_count = 0
-              
-            print(seq, file=f)
-            base_count += len(seq)
-
-        utils.close(f)
-
-
-def replace_bases(infile, outfile, old, new):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq.replace_bases(old, new)
-        print(seq, file=f_out)
-
-    utils.close(f_out)
-
-
-def strip_illumina_suffix(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq.strip_illumina_suffix()
-        print(seq, file=f_out)
-
-    utils.close(f_out)
-
-
-def to_fasta(infile, outfile, line_length=60, strip_after_first_whitespace=False):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-    original_line_length = sequences.Fasta.line_length
-    sequences.Fasta.line_length = line_length
-
-    for seq in seq_reader:
-        if strip_after_first_whitespace:
-            seq.strip_after_first_whitespace()
-
-        if type(seq) == sequences.Fastq:
-            print(sequences.Fasta(seq.id, seq.seq), file=f_out)
-        else:
-            print(seq, file=f_out)
-
-    utils.close(f_out)
-    sequences.Fasta.line_length = original_line_length
-
-
-def to_quasr_primers(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    f_out = utils.open_file_write(outfile)
-
-    for seq in seq_reader:
-        seq2 = copy.copy(seq)
-        seq2.revcomp()
-        print(seq.seq, seq2.seq, sep='\t', file=f_out)
-
-    utils.close(f_out)
-
-    
-def to_unique_by_id(infile, outfile):
-    seq_reader = sequences.file_reader(infile)
-    seqs = {}
-    ids_in_order = []
-
-    # has the reads, keeping the longest one when we get the same
-    # name more than once
-    for seq in seq_reader:
-        if len(seq) == 0:
-           continue
-        if seq.id not in seqs:
-            seqs[seq.id] = copy.copy(seq)
-            ids_in_order.append(seq.id)
-        elif len(seqs[seq.id]) < len(seq):
-            seqs[seq.id] = copy.copy(seq)
-
-    # write the output
-    f_out = utils.open_file_write(outfile)
-    for id in ids_in_order:
-        print(seqs[id], file=f_out)
-    utils.close(f_out)
diff --git a/fastaq/tests/data/sequences_test.embl b/fastaq/tests/data/sequences_test.embl
deleted file mode 100644
index b40c185..0000000
--- a/fastaq/tests/data/sequences_test.embl
+++ /dev/null
@@ -1,203 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-//
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/fastaq/tests/data/sequences_test.embl.bad b/fastaq/tests/data/sequences_test.embl.bad
deleted file mode 100644
index 10ca1ab..0000000
--- a/fastaq/tests/data/sequences_test.embl.bad
+++ /dev/null
@@ -1,202 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-//
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/fastaq/tests/data/sequences_test.embl.bad2 b/fastaq/tests/data/sequences_test.embl.bad2
deleted file mode 100644
index 1dd59b1..0000000
--- a/fastaq/tests/data/sequences_test.embl.bad2
+++ /dev/null
@@ -1,202 +0,0 @@
-ID   seq1; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa aaaaaaaaa       1859
-ID   seq2; SV 1; linear; mRNA; STD; PLN; 1859 BP.
-XX
-AC   X56734; S46826;
-XX
-DT   12-SEP-1991 (Rel. 29, Created)
-DT   25-NOV-2005 (Rel. 85, Last updated, Version 11)
-XX
-DE   Trifolium repens mRNA for non-cyanogenic beta-glucosidase
-XX
-KW   beta-glucosidase.
-XX
-OS   Trifolium repens (white clover)
-OC   Eukaryota; Viridiplantae; Streptophyta; Embryophyta; Tracheophyta;
-OC   Spermatophyta; Magnoliophyta; eudicotyledons; core eudicotyledons; rosids;
-OC   fabids; Fabales; Fabaceae; Papilionoideae; Trifolieae; Trifolium.
-XX
-RN   [5]
-RP   1-1859
-RX   DOI; 10.1007/BF00039495.
-RX   PUBMED; 1907511.
-RA   Oxtoby E., Dunn M.A., Pancoro A., Hughes M.A.;
-RT   "Nucleotide and derived amino acid sequence of the cyanogenic
-RT   beta-glucosidase (linamarase) from white clover (Trifolium repens L.)";
-RL   Plant Mol. Biol. 17(2):209-219(1991).
-XX
-RN   [6]
-RP   1-1859
-RA   Hughes M.A.;
-RT   ;
-RL   Submitted (19-NOV-1990) to the INSDC.
-RL   Hughes M.A., University of Newcastle Upon Tyne, Medical School, Newcastle
-RL   Upon Tyne, NE2 4HH, UK
-XX
-DR   EuropePMC; PMC99098; 11752244.
-XX
-FH   Key             Location/Qualifiers
-FH
-FT   source          1..1859
-FT                   /organism="Trifolium repens"
-FT                   /mol_type="mRNA"
-FT                   /clone_lib="lambda gt10"
-FT                   /clone="TRE361"
-FT                   /tissue_type="leaves"
-FT                   /db_xref="taxon:3899"
-FT   mRNA            1..1859
-FT                   /experiment="experimental evidence, no additional details
-FT                   recorded"
-FT   CDS             14..1495
-FT                   /product="beta-glucosidase"
-FT                   /EC_number="3.2.1.21"
-FT                   /note="non-cyanogenic"
-FT                   /db_xref="GOA:P26204"
-FT                   /db_xref="InterPro:IPR001360"
-FT                   /db_xref="InterPro:IPR013781"
-FT                   /db_xref="InterPro:IPR017853"
-FT                   /db_xref="InterPro:IPR018120"
-FT                   /db_xref="UniProtKB/Swiss-Prot:P26204"
-FT                   /protein_id="CAA40058.1"
-FT                   /translation="MDFIVAIFALFVISSFTITSTNAVEASTLLDIGNLSRSSFPRGFI
-FT                   FGAGSSAYQFEGAVNEGGRGPSIWDTFTHKYPEKIRDGSNADITVDQYHRYKEDVGIMK
-FT                   DQNMDSYRFSISWPRILPKGKLSGGINHEGIKYYNNLINELLANGIQPFVTLFHWDLPQ
-FT                   VLEDEYGGFLNSGVINDFRDYTDLCFKEFGDRVRYWSTLNEPWVFSNSGYALGTNAPGR
-FT                   CSASNVAKPGDSGTGPYIVTHNQILAHAEAVHVYKTKYQAYQKGKIGITLVSNWLMPLD
-FT                   DNSIPDIKAAERSLDFQFGLFMEQLTTGDYSKSMRRIVKNRLPKFSKFESSLVNGSFDF
-FT                   IGINYYSSSYISNAPSHGNAKPSYSTNPMTNISFEKHGIPLGPRAASIWIYVYPYMFIQ
-FT                   EDFEIFCYILKINITILQFSITENGMNEFNDATLPVEEALLNTYRIDYYYRHLYYIRSA
-FT                   IRAGSNVKGFYAWSFLDCNEWFAGFTVRFGLNFVD"
-XX
-SQ   Sequence 1859 BP; 609 A; 314 C; 355 G; 581 T; 0 other;
-     aaacaaacca aatatggatt ttattgtagc catatttgct ctgtttgtta ttagctcatt        60
-     cacaattact tccacaaatg cagttgaagc ttctactctt cttgacatag gtaacctgag       120
-     tcggagcagt tttcctcgtg gcttcatctt tggtgctgga tcttcagcat accaatttga       180
-     aggtgcagta aacgaaggcg gtagaggacc aagtatttgg gataccttca cccataaata       240
-     tccagaaaaa ataagggatg gaagcaatgc agacatcacg gttgaccaat atcaccgcta       300
-     caaggaagat gttgggatta tgaaggatca aaatatggat tcgtatagat tctcaatctc       360
-     ttggccaaga atactcccaa agggaaagtt gagcggaggc ataaatcacg aaggaatcaa       420
-     atattacaac aaccttatca acgaactatt ggctaacggt atacaaccat ttgtaactct       480
-     ttttcattgg gatcttcccc aagtcttaga agatgagtat ggtggtttct taaactccgg       540
-     tgtaataaat gattttcgag actatacgga tctttgcttc aaggaatttg gagatagagt       600
-     gaggtattgg agtactctaa atgagccatg ggtgtttagc aattctggat atgcactagg       660
-     aacaaatgca ccaggtcgat gttcggcctc caacgtggcc aagcctggtg attctggaac       720
-     aggaccttat atagttacac acaatcaaat tcttgctcat gcagaagctg tacatgtgta       780
-     taagactaaa taccaggcat atcaaaaggg aaagataggc ataacgttgg tatctaactg       840
-     gttaatgcca cttgatgata atagcatacc agatataaag gctgccgaga gatcacttga       900
-     cttccaattt ggattgttta tggaacaatt aacaacagga gattattcta agagcatgcg       960
-     gcgtatagtt aaaaaccgat tacctaagtt ctcaaaattc gaatcaagcc tagtgaatgg      1020
-     ttcatttgat tttattggta taaactatta ctcttctagt tatattagca atgccccttc      1080
-     acatggcaat gccaaaccca gttactcaac aaatcctatg accaatattt catttgaaaa      1140
-     acatgggata cccttaggtc caagggctgc ttcaatttgg atatatgttt atccatatat      1200
-     gtttatccaa gaggacttcg agatcttttg ttacatatta aaaataaata taacaatcct      1260
-     gcaattttca atcactgaaa atggtatgaa tgaattcaac gatgcaacac ttccagtaga      1320
-     agaagctctt ttgaatactt acagaattga ttactattac cgtcacttat actacattcg      1380
-     ttctgcaatc agggctggct caaatgtgaa gggtttttac gcatggtcat ttttggactg      1440
-     taatgaatgg tttgcaggct ttactgttcg ttttggatta aactttgtag attagaaaga      1500
-     tggattaaaa aggtacccta agctttctgc ccaatggtac aagaactttc tcaaaagaaa      1560
-     ctagctagta ttattaaaag aactttgtag tagattacag tacatcgttt gaagttgagt      1620
-     tggtgcacct aattaaataa aagaggttac tcttaacata tttttaggcc attcgttgtg      1680
-     aagttgttag gctgttattt ctattatact atgttgtagt aataagtgca ttgttgtacc      1740
-     agaagctatg atcataacta taggttgatc cttcatgtat cagtttgatg ttgagaatac      1800
-     tttgaattaa aagtcttttt ttattttttt aaaaaaaaaa aaaaaaaaaa ccccccccc       1859
-//
-
diff --git a/fastaq/tests/data/sequences_test.embl.to_fasta b/fastaq/tests/data/sequences_test.embl.to_fasta
deleted file mode 100644
index 89e2230..0000000
--- a/fastaq/tests/data/sequences_test.embl.to_fasta
+++ /dev/null
@@ -1,64 +0,0 @@
->seq1
-aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcatt
-cacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgag
-tcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttga
-aggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaata
-tccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgcta
-caaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctc
-ttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaa
-atattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactct
-ttttcattgggatcttccccaagtcttagaagatgagtatggtggtttcttaaactccgg
-tgtaataaatgattttcgagactatacggatctttgcttcaaggaatttggagatagagt
-gaggtattggagtactctaaatgagccatgggtgtttagcaattctggatatgcactagg
-aacaaatgcaccaggtcgatgttcggcctccaacgtggccaagcctggtgattctggaac
-aggaccttatatagttacacacaatcaaattcttgctcatgcagaagctgtacatgtgta
-taagactaaataccaggcatatcaaaagggaaagataggcataacgttggtatctaactg
-gttaatgccacttgatgataatagcataccagatataaaggctgccgagagatcacttga
-cttccaatttggattgtttatggaacaattaacaacaggagattattctaagagcatgcg
-gcgtatagttaaaaaccgattacctaagttctcaaaattcgaatcaagcctagtgaatgg
-ttcatttgattttattggtataaactattactcttctagttatattagcaatgccccttc
-acatggcaatgccaaacccagttactcaacaaatcctatgaccaatatttcatttgaaaa
-acatgggatacccttaggtccaagggctgcttcaatttggatatatgtttatccatatat
-gtttatccaagaggacttcgagatcttttgttacatattaaaaataaatataacaatcct
-gcaattttcaatcactgaaaatggtatgaatgaattcaacgatgcaacacttccagtaga
-agaagctcttttgaatacttacagaattgattactattaccgtcacttatactacattcg
-ttctgcaatcagggctggctcaaatgtgaagggtttttacgcatggtcatttttggactg
-taatgaatggtttgcaggctttactgttcgttttggattaaactttgtagattagaaaga
-tggattaaaaaggtaccctaagctttctgcccaatggtacaagaactttctcaaaagaaa
-ctagctagtattattaaaagaactttgtagtagattacagtacatcgtttgaagttgagt
-tggtgcacctaattaaataaaagaggttactcttaacatatttttaggccattcgttgtg
-aagttgttaggctgttatttctattatactatgttgtagtaataagtgcattgttgtacc
-agaagctatgatcataactataggttgatccttcatgtatcagtttgatgttgagaatac
-tttgaattaaaagtctttttttatttttttaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
->seq2
-aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcatt
-cacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgag
-tcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttga
-aggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaata
-tccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgcta
-caaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctc
-ttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaa
-atattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactct
-ttttcattgggatcttccccaagtcttagaagatgagtatggtggtttcttaaactccgg
-tgtaataaatgattttcgagactatacggatctttgcttcaaggaatttggagatagagt
-gaggtattggagtactctaaatgagccatgggtgtttagcaattctggatatgcactagg
-aacaaatgcaccaggtcgatgttcggcctccaacgtggccaagcctggtgattctggaac
-aggaccttatatagttacacacaatcaaattcttgctcatgcagaagctgtacatgtgta
-taagactaaataccaggcatatcaaaagggaaagataggcataacgttggtatctaactg
-gttaatgccacttgatgataatagcataccagatataaaggctgccgagagatcacttga
-cttccaatttggattgtttatggaacaattaacaacaggagattattctaagagcatgcg
-gcgtatagttaaaaaccgattacctaagttctcaaaattcgaatcaagcctagtgaatgg
-ttcatttgattttattggtataaactattactcttctagttatattagcaatgccccttc
-acatggcaatgccaaacccagttactcaacaaatcctatgaccaatatttcatttgaaaa
-acatgggatacccttaggtccaagggctgcttcaatttggatatatgtttatccatatat
-gtttatccaagaggacttcgagatcttttgttacatattaaaaataaatataacaatcct
-gcaattttcaatcactgaaaatggtatgaatgaattcaacgatgcaacacttccagtaga
-agaagctcttttgaatacttacagaattgattactattaccgtcacttatactacattcg
-ttctgcaatcagggctggctcaaatgtgaagggtttttacgcatggtcatttttggactg
-taatgaatggtttgcaggctttactgttcgttttggattaaactttgtagattagaaaga
-tggattaaaaaggtaccctaagctttctgcccaatggtacaagaactttctcaaaagaaa
-ctagctagtattattaaaagaactttgtagtagattacagtacatcgtttgaagttgagt
-tggtgcacctaattaaataaaagaggttactcttaacatatttttaggccattcgttgtg
-aagttgttaggctgttatttctattatactatgttgtagtaataagtgcattgttgtacc
-agaagctatgatcataactataggttgatccttcatgtatcagtttgatgttgagaatac
-tttgaattaaaagtctttttttatttttttaaaaaaaaaaaaaaaaaaaaccccccccc
diff --git a/fastaq/tests/data/sequences_test.fa b/fastaq/tests/data/sequences_test.fa
deleted file mode 100644
index 22da7a3..0000000
--- a/fastaq/tests/data/sequences_test.fa
+++ /dev/null
@@ -1,19 +0,0 @@
->1
-ACGTA
->2
-A
-
-C
-GT
-
-A
-
->3
-
-
-ACGTA
->4
-ACGTA
-
-
-
diff --git a/fastaq/tests/data/sequences_test.fa.ids b/fastaq/tests/data/sequences_test.fa.ids
deleted file mode 100644
index 94ebaf9..0000000
--- a/fastaq/tests/data/sequences_test.fa.ids
+++ /dev/null
@@ -1,4 +0,0 @@
-1
-2
-3
-4
diff --git a/fastaq/tests/data/sequences_test.fa.qual b/fastaq/tests/data/sequences_test.fa.qual
deleted file mode 100644
index 435d562..0000000
--- a/fastaq/tests/data/sequences_test.fa.qual
+++ /dev/null
@@ -1,17 +0,0 @@
->1
-40 40 40
-40 40
-
->2
-40
-40
-
-40
-40 40
->3
-
-40 40 40 40 40
-
->4
-40 40 40      40 40
-
diff --git a/fastaq/tests/data/sequences_test.fa.qual.bad b/fastaq/tests/data/sequences_test.fa.qual.bad
deleted file mode 100644
index 92c8d8d..0000000
--- a/fastaq/tests/data/sequences_test.fa.qual.bad
+++ /dev/null
@@ -1,17 +0,0 @@
->1
-40 40 40
-40 40
-
->3
-40
-40
-
-40
-40 40
->3
-
-40 40 40 40 40
-
->4
-40 40 40      40 40
-
diff --git a/fastaq/tests/data/sequences_test.fasta_to_fastq.fq b/fastaq/tests/data/sequences_test.fasta_to_fastq.fq
deleted file mode 100644
index 48f7282..0000000
--- a/fastaq/tests/data/sequences_test.fasta_to_fastq.fq
+++ /dev/null
@@ -1,16 +0,0 @@
- at 1
-ACGTA
-+
-IIIII
- at 2
-ACGTA
-+
-IIIII
- at 3
-ACGTA
-+
-IIIII
- at 4
-ACGTA
-+
-IIIII
diff --git a/fastaq/tests/data/sequences_test.gbk b/fastaq/tests/data/sequences_test.gbk
deleted file mode 100644
index 40f1afb..0000000
--- a/fastaq/tests/data/sequences_test.gbk
+++ /dev/null
@@ -1,170 +0,0 @@
-LOCUS       NAME1         5028 bp    DNA             PLN       21-JUN-1999
-DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
-            (AXL2) and Rev7p (REV7) genes, complete cds.
-ACCESSION   U49845
-VERSION     U49845.1  GI:1293613
-KEYWORDS    .
-SOURCE      Saccharomyces cerevisiae (baker's yeast)
-  ORGANISM  Saccharomyces cerevisiae
-            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
-            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
-REFERENCE   1  (bases 1 to 5028)
-  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
-  TITLE     Cloning and sequence of REV7, a gene whose function is required for
-            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
-  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
-  PUBMED    7871890
-REFERENCE   2  (bases 1 to 5028)
-  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
-  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
-            plasma membrane glycoprotein
-  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
-  PUBMED    8846915
-REFERENCE   3  (bases 1 to 5028)
-  AUTHORS   Roemer,T.
-  TITLE     Direct Submission
-  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
-            Haven, CT, USA
-FEATURES             Location/Qualifiers
-     source          1..5028
-                     /organism="Saccharomyces cerevisiae"
-                     /db_xref="taxon:4932"
-                     /chromosome="IX"
-                     /map="9"
-     CDS             <1..206
-                     /codon_start=3
-                     /product="TCP1-beta"
-                     /protein_id="AAA98665.1"
-                     /db_xref="GI:1293614"
-                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
-                     AEVLLRVDNIIRARPRTANRQHM"
-     gene            687..3158
-                     /gene="AXL2"
-     CDS             687..3158
-                     /gene="AXL2"
-                     /note="plasma membrane glycoprotein"
-                     /codon_start=1
-                     /function="required for axial budding pattern of S.
-                     cerevisiae"
-                     /product="Axl2p"
-                     /protein_id="AAA98666.1"
-                     /db_xref="GI:1293615"
-                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
-                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
-                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
-                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
-                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
-                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
-                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
-                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
-                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
-                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
-                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
-                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
-                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
-                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
-                     VDFSNKSNVNVGQVKDIHGRIPEML"
-     gene            complement(3300..4037)
-                     /gene="REV7"
-     CDS             complement(3300..4037)
-                     /gene="REV7"
-                     /codon_start=1
-                     /product="Rev7p"
-                     /protein_id="AAA98667.1"
-                     /db_xref="GI:1293616"
-                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
-                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
-                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
-                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
-                     LISGDDKILNGVYSQYEEGESIFGSLF"
-ORIGIN
-        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
-       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
-      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
-      181 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgatc
-//
-LOCUS       NAME2         5028 bp    DNA             PLN       21-JUN-1999
-DEFINITION  Saccharomyces cerevisiae TCP1-beta gene, partial cds, and Axl2p
-            (AXL2) and Rev7p (REV7) genes, complete cds.
-ACCESSION   U49845
-VERSION     U49845.1  GI:1293613
-KEYWORDS    .
-SOURCE      Saccharomyces cerevisiae (baker's yeast)
-  ORGANISM  Saccharomyces cerevisiae
-            Eukaryota; Fungi; Ascomycota; Saccharomycotina; Saccharomycetes;
-            Saccharomycetales; Saccharomycetaceae; Saccharomyces.
-REFERENCE   1  (bases 1 to 5028)
-  AUTHORS   Torpey,L.E., Gibbs,P.E., Nelson,J. and Lawrence,C.W.
-  TITLE     Cloning and sequence of REV7, a gene whose function is required for
-            DNA damage-induced mutagenesis in Saccharomyces cerevisiae
-  JOURNAL   Yeast 10 (11), 1503-1509 (1994)
-  PUBMED    7871890
-REFERENCE   2  (bases 1 to 5028)
-  AUTHORS   Roemer,T., Madden,K., Chang,J. and Snyder,M.
-  TITLE     Selection of axial growth sites in yeast requires Axl2p, a novel
-            plasma membrane glycoprotein
-  JOURNAL   Genes Dev. 10 (7), 777-793 (1996)
-  PUBMED    8846915
-REFERENCE   3  (bases 1 to 5028)
-  AUTHORS   Roemer,T.
-  TITLE     Direct Submission
-  JOURNAL   Submitted (22-FEB-1996) Terry Roemer, Biology, Yale University, New
-            Haven, CT, USA
-FEATURES             Location/Qualifiers
-     source          1..5028
-                     /organism="Saccharomyces cerevisiae"
-                     /db_xref="taxon:4932"
-                     /chromosome="IX"
-                     /map="9"
-     CDS             <1..206
-                     /codon_start=3
-                     /product="TCP1-beta"
-                     /protein_id="AAA98665.1"
-                     /db_xref="GI:1293614"
-                     /translation="SSIYNGISTSGLDLNNGTIADMRQLGIVESYKLKRAVVSSASEA
-                     AEVLLRVDNIIRARPRTANRQHM"
-     gene            687..3158
-                     /gene="AXL2"
-     CDS             687..3158
-                     /gene="AXL2"
-                     /note="plasma membrane glycoprotein"
-                     /codon_start=1
-                     /function="required for axial budding pattern of S.
-                     cerevisiae"
-                     /product="Axl2p"
-                     /protein_id="AAA98666.1"
-                     /db_xref="GI:1293615"
-                     /translation="MTQLQISLLLTATISLLHLVVATPYEAYPIGKQYPPVARVNESF
-                     TFQISNDTYKSSVDKTAQITYNCFDLPSWLSFDSSSRTFSGEPSSDLLSDANTTLYFN
-                     VILEGTDSADSTSLNNTYQFVVTNRPSISLSSDFNLLALLKNYGYTNGKNALKLDPNE
-                     VFNVTFDRSMFTNEESIVSYYGRSQLYNAPLPNWLFFDSGELKFTGTAPVINSAIAPE
-                     TSYSFVIIATDIEGFSAVEVEFELVIGAHQLTTSIQNSLIINVTDTGNVSYDLPLNYV
-                     YLDDDPISSDKLGSINLLDAPDWVALDNATISGSVPDELLGKNSNPANFSVSIYDTYG
-                     DVIYFNFEVVSTTDLFAISSLPNINATRGEWFSYYFLPSQFTDYVNTNVSLEFTNSSQ
-                     DHDWVKFQSSNLTLAGEVPKNFDKLSLGLKANQGSQSQELYFNIIGMDSKITHSNHSA
-                     NATSTRSSHHSTSTSSYTSSTYTAKISSTSAAATSSAPAALPAANKTSSHNKKAVAIA
-                     CGVAIPLGVILVALICFLIFWRRRRENPDDENLPHAISGPDLNNPANKPNQENATPLN
-                     NPFDDDASSYDDTSIARRLAALNTLKLDNHSATESDISSVDEKRDSLSGMNTYNDQFQ
-                     SQSKEELLAKPPVQPPESPFFDPQNRSSSVYMDSEPAVNKSWRYTGNLSPVSDIVRDS
-                     YGSQKTVDTEKLFDLEAPEKEKRTSRDVTMSSLDPWNSNISPSPVRKSVTPSPYNVTK
-                     HRNRHLQNIQDSQSGKNGITPTTMSTSSSDDFVPVKDGENFCWVHSMEPDRRPSKKRL
-                     VDFSNKSNVNVGQVKDIHGRIPEML"
-     gene            complement(3300..4037)
-                     /gene="REV7"
-     CDS             complement(3300..4037)
-                     /gene="REV7"
-                     /codon_start=1
-                     /product="Rev7p"
-                     /protein_id="AAA98667.1"
-                     /db_xref="GI:1293616"
-                     /translation="MNRWVEKWLRVYLKCYINLILFYRNVYPPQSFDYTTYQSFNLPQ
-                     FVPINRHPALIDYIEELILDVLSKLTHVYRFSICIINKKNDLCIEKYVLDFSELQHVD
-                     KDDQIITETEVFDEFRSSLNSLIMHLEKLPKVNDDTITFEAVINAIELELGHKLDRNR
-                     RVDSLEEKAEIERDSNWVKCQEDENLPDNNGFQPPKIKLTSLVGSDVGPLIIHQFSEK
-                     LISGDDKILNGVYSQYEEGESIFGSLF"
-ORIGIN
-        1 gatcctccat atacaacggt atctccacct caggtttaga tctcaacaac ggaaccattg
-       61 ccgacatgag acagttaggt atcgtcgaga gttacaagct aaaacgagca gtagtcagct
-      121 ctgcatctga agccgctgaa gttctactaa gggtggataa catcatccgt gcaagaccaa
-      181 tgccatgact cagattctaa ttttaagcta ttcaatttct ctttgaaa
-//
diff --git a/fastaq/tests/data/sequences_test.gbk.to_fasta b/fastaq/tests/data/sequences_test.gbk.to_fasta
deleted file mode 100644
index 270d9ec..0000000
--- a/fastaq/tests/data/sequences_test.gbk.to_fasta
+++ /dev/null
@@ -1,10 +0,0 @@
->NAME1
-gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattg
-ccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagct
-ctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaa
-tgccatgactcagattctaattttaagctattcaatttctctttgatc
->NAME2
-gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattg
-ccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagct
-ctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaa
-tgccatgactcagattctaattttaagctattcaatttctctttgaaa
diff --git a/fastaq/tests/data/sequences_test.line_length3.fa b/fastaq/tests/data/sequences_test.line_length3.fa
deleted file mode 100644
index a77df6d..0000000
--- a/fastaq/tests/data/sequences_test.line_length3.fa
+++ /dev/null
@@ -1,12 +0,0 @@
->1
-ACG
-TA
->2
-ACG
-TA
->3
-ACG
-TA
->4
-ACG
-TA
diff --git a/fastaq/tests/data/sequences_test_3-per-line.fa b/fastaq/tests/data/sequences_test_3-per-line.fa
deleted file mode 100644
index 01ec932..0000000
--- a/fastaq/tests/data/sequences_test_3-per-line.fa
+++ /dev/null
@@ -1,19 +0,0 @@
->ID
-A
->ID
-AA
->ID
-AAA
->ID
-AAA
-A
->ID
-AAA
-AA
->ID
-AAA
-AAA
->ID
-AAA
-AAA
-A
diff --git a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa b/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa
deleted file mode 100644
index dffde8b..0000000
--- a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa
+++ /dev/null
@@ -1,16 +0,0 @@
->one.p1k
-ACGT
->one.q1k
-CCCC
->two.p1k
-A
->two.q1k
-C
->one.p1k
-TTTTTTTTTT
->three.q1k
-A
->four.x
-T
->five.p1k
-G
diff --git a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.paired.gz b/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.paired.gz
deleted file mode 100644
index 5f98494..0000000
Binary files a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.paired.gz and /dev/null differ
diff --git a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.unpaired.gz b/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.unpaired.gz
deleted file mode 100644
index 2e8d705..0000000
Binary files a/fastaq/tests/data/sequences_test_cap_to_read_pairs.fa.unpaired.gz and /dev/null differ
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_1.fa b/fastaq/tests/data/sequences_test_deinterleaved_1.fa
deleted file mode 100644
index cb095ce..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_1.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1/1
-ACGTA
->2/1
-A
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_2.fa b/fastaq/tests/data/sequences_test_deinterleaved_2.fa
deleted file mode 100644
index d0017c9..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_2.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1/2
-ACGTA
->2/2
-C
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_bad2_1.fa b/fastaq/tests/data/sequences_test_deinterleaved_bad2_1.fa
deleted file mode 100644
index 0f656b5..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_bad2_1.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->1/1
-ACGTA
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_bad2_2.fa b/fastaq/tests/data/sequences_test_deinterleaved_bad2_2.fa
deleted file mode 100644
index ad68ff1..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_bad2_2.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1/2
-ACGTA
->2/2
-A
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_bad_1.fa b/fastaq/tests/data/sequences_test_deinterleaved_bad_1.fa
deleted file mode 100644
index cb095ce..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_bad_1.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1/1
-ACGTA
->2/1
-A
diff --git a/fastaq/tests/data/sequences_test_deinterleaved_bad_2.fa b/fastaq/tests/data/sequences_test_deinterleaved_bad_2.fa
deleted file mode 100644
index baf20b4..0000000
--- a/fastaq/tests/data/sequences_test_deinterleaved_bad_2.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->1/2
-ACGTA
diff --git a/fastaq/tests/data/sequences_test_empty_file b/fastaq/tests/data/sequences_test_empty_file
deleted file mode 100644
index e69de29..0000000
diff --git a/fastaq/tests/data/sequences_test_enumerate_names.fa b/fastaq/tests/data/sequences_test_enumerate_names.fa
deleted file mode 100644
index d2dce18..0000000
--- a/fastaq/tests/data/sequences_test_enumerate_names.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->one/1
-A
->one/2
-C
->two/1
-G
->two/2
-T
diff --git a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.keep_suffix b/fastaq/tests/data/sequences_test_enumerate_names.fa.out.keep_suffix
deleted file mode 100644
index dba3ca0..0000000
--- a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.keep_suffix
+++ /dev/null
@@ -1,8 +0,0 @@
->1/1
-A
->2/2
-C
->3/1
-G
->4/2
-T
diff --git a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1 b/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1
deleted file mode 100644
index 2c8d196..0000000
--- a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-A
->2
-C
->3
-G
->4
-T
diff --git a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1.rename_file b/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1.rename_file
deleted file mode 100644
index 8de27ac..0000000
--- a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.1.rename_file
+++ /dev/null
@@ -1,5 +0,0 @@
-#old	new
-one/1	1
-one/2	2
-two/1	3
-two/2	4
diff --git a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.2 b/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.2
deleted file mode 100644
index fdb1d1f..0000000
--- a/fastaq/tests/data/sequences_test_enumerate_names.fa.out.start.2
+++ /dev/null
@@ -1,8 +0,0 @@
->2
-A
->3
-C
->4
-G
->5
-T
diff --git a/fastaq/tests/data/sequences_test_extend_gaps.fa b/fastaq/tests/data/sequences_test_extend_gaps.fa
deleted file mode 100644
index b05b56c..0000000
--- a/fastaq/tests/data/sequences_test_extend_gaps.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-AC
->2
-ACGTACGT
->3
-ACGTNACGT
->4
-ACGTACGTNACGTACGT
diff --git a/fastaq/tests/data/sequences_test_extend_gaps.fa.out b/fastaq/tests/data/sequences_test_extend_gaps.fa.out
deleted file mode 100644
index e1ec718..0000000
--- a/fastaq/tests/data/sequences_test_extend_gaps.fa.out
+++ /dev/null
@@ -1,4 +0,0 @@
->2
-GTAC
->4
-GTACNNNNNGTAC
diff --git a/fastaq/tests/data/sequences_test_fai_test.fa b/fastaq/tests/data/sequences_test_fai_test.fa
deleted file mode 100644
index 7c02b44..0000000
--- a/fastaq/tests/data/sequences_test_fai_test.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-A
->2
-AA
->3
-AAA
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_fai_test.fa.fai b/fastaq/tests/data/sequences_test_fai_test.fa.fai
deleted file mode 100644
index 154a9d6..0000000
--- a/fastaq/tests/data/sequences_test_fai_test.fa.fai
+++ /dev/null
@@ -1,4 +0,0 @@
-1	1	3	1	2
-2	2	8	2	3
-3	3	14	3	4
-4	4	21	4	5
diff --git a/fastaq/tests/data/sequences_test_fail_no_AT.fq b/fastaq/tests/data/sequences_test_fail_no_AT.fq
deleted file mode 100644
index d1472fc..0000000
--- a/fastaq/tests/data/sequences_test_fail_no_AT.fq
+++ /dev/null
@@ -1,5 +0,0 @@
- at 1
-A
-+
-I
-NOT_AN_ at _LINE
diff --git a/fastaq/tests/data/sequences_test_fail_no_plus.fq b/fastaq/tests/data/sequences_test_fail_no_plus.fq
deleted file mode 100644
index 52b5b7c..0000000
--- a/fastaq/tests/data/sequences_test_fail_no_plus.fq
+++ /dev/null
@@ -1,4 +0,0 @@
- at A
-A
-NOT_A_+
-I
diff --git a/fastaq/tests/data/sequences_test_fail_no_qual.fq b/fastaq/tests/data/sequences_test_fail_no_qual.fq
deleted file mode 100644
index 16ca520..0000000
--- a/fastaq/tests/data/sequences_test_fail_no_qual.fq
+++ /dev/null
@@ -1,3 +0,0 @@
- at A
-A
-+
diff --git a/fastaq/tests/data/sequences_test_fail_no_seq.fq b/fastaq/tests/data/sequences_test_fail_no_seq.fq
deleted file mode 100644
index de51137..0000000
--- a/fastaq/tests/data/sequences_test_fail_no_seq.fq
+++ /dev/null
@@ -1,5 +0,0 @@
- at A
-A
-+
-I
- at B
diff --git a/fastaq/tests/data/sequences_test_fastaq_replace_bases.expected.fa b/fastaq/tests/data/sequences_test_fastaq_replace_bases.expected.fa
deleted file mode 100644
index 98e1577..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_replace_bases.expected.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->1
-ACGXXXAXA
diff --git a/fastaq/tests/data/sequences_test_fastaq_replace_bases.fa b/fastaq/tests/data/sequences_test_fastaq_replace_bases.fa
deleted file mode 100644
index c33edf7..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_replace_bases.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->1
-ACGTTTATA
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
deleted file mode 100644
index 88ce837..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.expected
+++ /dev/null
@@ -1,2 +0,0 @@
-ACGT	ACGT
-AG	CT
diff --git a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa b/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
deleted file mode 100644
index be7c130..0000000
--- a/fastaq/tests/data/sequences_test_fastaq_to_quasr_primers.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-ACGT
->2
-AG
diff --git a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa b/fastaq/tests/data/sequences_test_filter_by_ids_file.fa
deleted file mode 100644
index 35845b6..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->seq1
-A
->seq2
-C
->seq3
-G
->seq4
-T
diff --git a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered b/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered
deleted file mode 100644
index 3519900..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered
+++ /dev/null
@@ -1,4 +0,0 @@
->seq2
-C
->seq4
-T
diff --git a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered.invert b/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered.invert
deleted file mode 100644
index af15a5f..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.filtered.invert
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-A
->seq3
-G
diff --git a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.ids b/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.ids
deleted file mode 100644
index 486529d..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_ids_file.fa.ids
+++ /dev/null
@@ -1,2 +0,0 @@
-seq4
-seq2
diff --git a/fastaq/tests/data/sequences_test_filter_by_regex.fa b/fastaq/tests/data/sequences_test_filter_by_regex.fa
deleted file mode 100644
index d2cc8eb..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_regex.fa
+++ /dev/null
@@ -1,10 +0,0 @@
->1
-AAA
->a
-AAA
->a/1
-AAA
->a/2
-AAA
->b/1
-AAA
diff --git a/fastaq/tests/data/sequences_test_filter_by_regex.first-char-a.fa b/fastaq/tests/data/sequences_test_filter_by_regex.first-char-a.fa
deleted file mode 100644
index e874092..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_regex.first-char-a.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->a
-AAA
->a/1
-AAA
->a/2
-AAA
diff --git a/fastaq/tests/data/sequences_test_filter_by_regex.first-of-pair.fa b/fastaq/tests/data/sequences_test_filter_by_regex.first-of-pair.fa
deleted file mode 100644
index 6dd5e50..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_regex.first-of-pair.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->a/1
-AAA
->b/1
-AAA
diff --git a/fastaq/tests/data/sequences_test_filter_by_regex.numeric.fa b/fastaq/tests/data/sequences_test_filter_by_regex.numeric.fa
deleted file mode 100644
index b2d5b58..0000000
--- a/fastaq/tests/data/sequences_test_filter_by_regex.numeric.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->1
-AAA
diff --git a/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa b/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa
deleted file mode 100644
index 18a368a..0000000
--- a/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->a
-ACGTCNGTCNNNGT
->b
-ACGTGTGTTG
diff --git a/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa.out b/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa.out
deleted file mode 100644
index ecd2305..0000000
--- a/fastaq/tests/data/sequences_test_get_seqs_flanking_gaps.fa.out
+++ /dev/null
@@ -1,3 +0,0 @@
-#id	gap_start	gap_end	left_bases	right_bases
-a	6	6	GTC	GTC
-a	10	12	GTC	GT
diff --git a/fastaq/tests/data/sequences_test_gffv3.gff b/fastaq/tests/data/sequences_test_gffv3.gff
deleted file mode 100644
index 5dab817..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.gff
+++ /dev/null
@@ -1,9 +0,0 @@
-##gff-version 3
-# comment
-##sequence-region seq1 1 10
-seq1	. 	gene	3	7	.	+	.	ID=gene1;name=name1
-##FASTA
->seq1
-ACGTACGTAC
->seq2
-ACGTACGTAC
diff --git a/fastaq/tests/data/sequences_test_gffv3.gff.fasta b/fastaq/tests/data/sequences_test_gffv3.gff.fasta
deleted file mode 100644
index 3b2f48f..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.gff.fasta
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-ACACGTGACG
->seq2
-AGTACCGTAA
diff --git a/fastaq/tests/data/sequences_test_gffv3.gff.to_fasta b/fastaq/tests/data/sequences_test_gffv3.gff.to_fasta
deleted file mode 100644
index 89cfed0..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.gff.to_fasta
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-ACGTACGTAC
->seq2
-ACGTACGTAC
diff --git a/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff b/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff
deleted file mode 100644
index 8e580fa..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff
+++ /dev/null
@@ -1,8 +0,0 @@
-##gff-version 3
-# comment
-##sequence-region seq1 1 10
-seq1	. 	gene	3	7	.	+	.	ID=gene1;name=name1
->seq1
-ACGTACGTAC
->seq2
-ACGTACGTAC
diff --git a/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff.to_fasta b/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff.to_fasta
deleted file mode 100644
index 89cfed0..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.no_FASTA_line.gff.to_fasta
+++ /dev/null
@@ -1,4 +0,0 @@
->seq1
-ACGTACGTAC
->seq2
-ACGTACGTAC
diff --git a/fastaq/tests/data/sequences_test_gffv3.no_seq.2.gff b/fastaq/tests/data/sequences_test_gffv3.no_seq.2.gff
deleted file mode 100644
index d9fda5c..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.no_seq.2.gff
+++ /dev/null
@@ -1,6 +0,0 @@
-##gff-version 3
-# comment
-##sequence-region seq1 1 10
-seq1	. 	gene	3	7	.	+	.	ID=gene1;name=name1
-##FASTA
-oops
diff --git a/fastaq/tests/data/sequences_test_gffv3.no_seq.gff b/fastaq/tests/data/sequences_test_gffv3.no_seq.gff
deleted file mode 100644
index dbe2b52..0000000
--- a/fastaq/tests/data/sequences_test_gffv3.no_seq.gff
+++ /dev/null
@@ -1,4 +0,0 @@
-##gff-version 3
-# comment
-##sequence-region seq1 1 10
-seq1	. 	gene	3	7	.	+	.	ID=gene1;name=name1
diff --git a/fastaq/tests/data/sequences_test_good_file.fq b/fastaq/tests/data/sequences_test_good_file.fq
deleted file mode 100644
index 12a42bc..0000000
--- a/fastaq/tests/data/sequences_test_good_file.fq
+++ /dev/null
@@ -1,11 +0,0 @@
- at ID
-ACGTA
-+
-IIIII
-
-
-
- at ID
-ACGTA
-+blah
-IIIII
diff --git a/fastaq/tests/data/sequences_test_good_file.fq.to_fasta b/fastaq/tests/data/sequences_test_good_file.fq.to_fasta
deleted file mode 100644
index c11bdfd..0000000
--- a/fastaq/tests/data/sequences_test_good_file.fq.to_fasta
+++ /dev/null
@@ -1,4 +0,0 @@
->ID
-ACGTA
->ID
-ACGTA
diff --git a/fastaq/tests/data/sequences_test_good_file_mira.xml b/fastaq/tests/data/sequences_test_good_file_mira.xml
deleted file mode 100644
index a9fe6a2..0000000
--- a/fastaq/tests/data/sequences_test_good_file_mira.xml
+++ /dev/null
@@ -1,13 +0,0 @@
-<?xml version="1.0"?>
-<trace_volume>
-    <trace>
-        <trace_name>ID</trace_name>
-        <clip_quality_right>5</clip_quality_right>
-        <clip_vector_left>1</clip_vector_left>
-    </trace>
-    <trace>
-        <trace_name>ID</trace_name>
-        <clip_quality_right>5</clip_quality_right>
-        <clip_vector_left>1</clip_vector_left>
-    </trace>
-</trace_volume>
diff --git a/fastaq/tests/data/sequences_test_interleaved.fa b/fastaq/tests/data/sequences_test_interleaved.fa
deleted file mode 100644
index 3692716..0000000
--- a/fastaq/tests/data/sequences_test_interleaved.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1/1
-ACGTA
->1/2
-ACGTA
->2/1
-A
->2/2
-C
diff --git a/fastaq/tests/data/sequences_test_interleaved.fq b/fastaq/tests/data/sequences_test_interleaved.fq
deleted file mode 100644
index 951d5a5..0000000
--- a/fastaq/tests/data/sequences_test_interleaved.fq
+++ /dev/null
@@ -1,16 +0,0 @@
- at 1/1
-ACGTA
-+
-IIIII
- at 1/2
-ACGTA
-+
-IIIII
- at 2/1
-A
-+
-I
- at 2/2
-C
-+
-I
diff --git a/fastaq/tests/data/sequences_test_interleaved_bad.fa b/fastaq/tests/data/sequences_test_interleaved_bad.fa
deleted file mode 100644
index fef6d47..0000000
--- a/fastaq/tests/data/sequences_test_interleaved_bad.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->1/1
-ACGTA
->1/2
-ACGTA
->2/1
-A
diff --git a/fastaq/tests/data/sequences_test_length_filter.fa b/fastaq/tests/data/sequences_test_length_filter.fa
deleted file mode 100644
index 7507f2e..0000000
--- a/fastaq/tests/data/sequences_test_length_filter.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->3
-AAA
->4
-AAAA
->5
-AAAAA
diff --git a/fastaq/tests/data/sequences_test_length_filter.min-0.max-1.fa b/fastaq/tests/data/sequences_test_length_filter.min-0.max-1.fa
deleted file mode 100644
index e69de29..0000000
diff --git a/fastaq/tests/data/sequences_test_length_filter.min-0.max-inf.fa b/fastaq/tests/data/sequences_test_length_filter.min-0.max-inf.fa
deleted file mode 100644
index 7507f2e..0000000
--- a/fastaq/tests/data/sequences_test_length_filter.min-0.max-inf.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->3
-AAA
->4
-AAAA
->5
-AAAAA
diff --git a/fastaq/tests/data/sequences_test_length_filter.min-4.max-4.fa b/fastaq/tests/data/sequences_test_length_filter.min-4.max-4.fa
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_length_filter.min-4.max-4.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_make_random_contigs.default.fa b/fastaq/tests/data/sequences_test_make_random_contigs.default.fa
deleted file mode 100644
index 8efafc7..0000000
--- a/fastaq/tests/data/sequences_test_make_random_contigs.default.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-ACG
->2
-ACG
diff --git a/fastaq/tests/data/sequences_test_make_random_contigs.first-42.fa b/fastaq/tests/data/sequences_test_make_random_contigs.first-42.fa
deleted file mode 100644
index 67ee20a..0000000
--- a/fastaq/tests/data/sequences_test_make_random_contigs.first-42.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->42
-ACG
->43
-ACG
diff --git a/fastaq/tests/data/sequences_test_make_random_contigs.name-by-letters.fa b/fastaq/tests/data/sequences_test_make_random_contigs.name-by-letters.fa
deleted file mode 100644
index 447d3e0..0000000
--- a/fastaq/tests/data/sequences_test_make_random_contigs.name-by-letters.fa
+++ /dev/null
@@ -1,56 +0,0 @@
->A
-ACG
->B
-ACG
->C
-ACG
->D
-ACG
->E
-ACG
->F
-ACG
->G
-ACG
->H
-ACG
->I
-ACG
->J
-ACG
->K
-ACG
->L
-ACG
->M
-ACG
->N
-ACG
->O
-ACG
->P
-ACG
->Q
-ACG
->R
-ACG
->S
-ACG
->T
-ACG
->U
-ACG
->V
-ACG
->W
-ACG
->X
-ACG
->Y
-ACG
->Z
-ACG
->A
-ACG
->B
-ACG
diff --git a/fastaq/tests/data/sequences_test_make_random_contigs.prefix-p.fa b/fastaq/tests/data/sequences_test_make_random_contigs.prefix-p.fa
deleted file mode 100644
index dc68695..0000000
--- a/fastaq/tests/data/sequences_test_make_random_contigs.prefix-p.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->p1
-ACG
->p2
-ACG
diff --git a/fastaq/tests/data/sequences_test_not_a_fastaq_file b/fastaq/tests/data/sequences_test_not_a_fastaq_file
deleted file mode 100644
index da4a76b..0000000
--- a/fastaq/tests/data/sequences_test_not_a_fastaq_file
+++ /dev/null
@@ -1 +0,0 @@
-i am not a fasta or fastq file
diff --git a/fastaq/tests/data/sequences_test_one-per-line.fa b/fastaq/tests/data/sequences_test_one-per-line.fa
deleted file mode 100644
index b6e4b2f..0000000
--- a/fastaq/tests/data/sequences_test_one-per-line.fa
+++ /dev/null
@@ -1,14 +0,0 @@
->ID
-A
->ID
-AA
->ID
-AAA
->ID
-AAAA
->ID
-AAAAA
->ID
-AAAAAA
->ID
-AAAAAAA
diff --git a/fastaq/tests/data/sequences_test_phylip.interleaved b/fastaq/tests/data/sequences_test_phylip.interleaved
deleted file mode 100644
index e9a42db..0000000
--- a/fastaq/tests/data/sequences_test_phylip.interleaved
+++ /dev/null
@@ -1,8 +0,0 @@
-  3    42
-Turkey    AA-CTNGGGC ATTTCAGGGT
-Salmo_gairAAGCCTTGGC AGTGCAGGGT
-H. SapiensACCGGTTGGC CGTTCAGGGT
-
-GAGCCCGGGC AATACAGGGT AT
-GAGCCGTGGC CGGGCACGGT AT
-ACAGGTTGGC CGTTCAGGGT AA
diff --git a/fastaq/tests/data/sequences_test_phylip.interleaved.to_fasta b/fastaq/tests/data/sequences_test_phylip.interleaved.to_fasta
deleted file mode 100644
index 22dfb80..0000000
--- a/fastaq/tests/data/sequences_test_phylip.interleaved.to_fasta
+++ /dev/null
@@ -1,6 +0,0 @@
->Turkey
-AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT
->Salmo_gair
-AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT
->H. Sapiens
-ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA
diff --git a/fastaq/tests/data/sequences_test_phylip.interleaved2 b/fastaq/tests/data/sequences_test_phylip.interleaved2
deleted file mode 100644
index 18e8795..0000000
--- a/fastaq/tests/data/sequences_test_phylip.interleaved2
+++ /dev/null
@@ -1,7 +0,0 @@
-  3    42
-Turkey    AA-CTNGGGC ATTTCAGGGT
-Salmo_gairAAGCCTTGGC AGTGCAGGGT
-H. SapiensACCGGTTGGC CGTTCAGGGT
-GAGCCCGGGC AATACAGGGT AT
-GAGCCGTGGC CGGGCACGGT AT
-ACAGGTTGGC CGTTCAGGGT AA
diff --git a/fastaq/tests/data/sequences_test_phylip.interleaved2.to_fasta b/fastaq/tests/data/sequences_test_phylip.interleaved2.to_fasta
deleted file mode 100644
index 22dfb80..0000000
--- a/fastaq/tests/data/sequences_test_phylip.interleaved2.to_fasta
+++ /dev/null
@@ -1,6 +0,0 @@
->Turkey
-AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT
->Salmo_gair
-AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT
->H. Sapiens
-ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA
diff --git a/fastaq/tests/data/sequences_test_phylip.made_by_seaview b/fastaq/tests/data/sequences_test_phylip.made_by_seaview
deleted file mode 100644
index 3f7b0cf..0000000
--- a/fastaq/tests/data/sequences_test_phylip.made_by_seaview
+++ /dev/null
@@ -1,6 +0,0 @@
-2   97
-seq1        GGGGGGGGGG GGGGGGGGGG GGGGGGGGGG GGGGGGGGGG GGGGGGGGGG GGGGGGGGGG
-seq2        AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA AAAAAAAAAA
-
-            GGGGGGGGGG GGGGGGGGGG GGGGGGGGGG GGGGGGT
-            AAAAAAAAAA AAAAAAAAAA AAAAAAAAA- -AAAAAG
diff --git a/fastaq/tests/data/sequences_test_phylip.made_by_seaview.to_fasta b/fastaq/tests/data/sequences_test_phylip.made_by_seaview.to_fasta
deleted file mode 100644
index 10d8264..0000000
--- a/fastaq/tests/data/sequences_test_phylip.made_by_seaview.to_fasta
+++ /dev/null
@@ -1,6 +0,0 @@
->seq1
-GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG
-GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGT
->seq2
-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
-AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAG
diff --git a/fastaq/tests/data/sequences_test_phylip.sequential b/fastaq/tests/data/sequences_test_phylip.sequential
deleted file mode 100644
index e9ce346..0000000
--- a/fastaq/tests/data/sequences_test_phylip.sequential
+++ /dev/null
@@ -1,7 +0,0 @@
-  3    42
-Turkey    AA-CTNGGGC ATTTCAGGGT
-GAGCCCGGGC AATACAGGGT AT
-Salmo_gairAAGCCTTGGC AGTGCAGGGT
-GAGCCGTGGC CGGGCACGGT AT
-H. SapiensACCGGTTGGC CGTTCAGGGT
-ACAGGTTGGC CGTTCAGGGT AA
diff --git a/fastaq/tests/data/sequences_test_phylip.sequential.to_fasta b/fastaq/tests/data/sequences_test_phylip.sequential.to_fasta
deleted file mode 100644
index 22dfb80..0000000
--- a/fastaq/tests/data/sequences_test_phylip.sequential.to_fasta
+++ /dev/null
@@ -1,6 +0,0 @@
->Turkey
-AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT
->Salmo_gair
-AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT
->H. Sapiens
-ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA
diff --git a/fastaq/tests/data/sequences_test_revcomp.fa b/fastaq/tests/data/sequences_test_revcomp.fa
deleted file mode 100644
index 4d9922f..0000000
--- a/fastaq/tests/data/sequences_test_revcomp.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-TACGT
->2
-TACGT
->3
-TACGT
->4
-TACGT
diff --git a/fastaq/tests/data/sequences_test_search_string.fa b/fastaq/tests/data/sequences_test_search_string.fa
deleted file mode 100644
index 5dc3d2c..0000000
--- a/fastaq/tests/data/sequences_test_search_string.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq
-AAGATCTAGAGATC
diff --git a/fastaq/tests/data/sequences_test_search_string.fa.hits b/fastaq/tests/data/sequences_test_search_string.fa.hits
deleted file mode 100644
index cde92ef..0000000
--- a/fastaq/tests/data/sequences_test_search_string.fa.hits
+++ /dev/null
@@ -1,4 +0,0 @@
-seq	2	+
-seq	8	+
-seq	10	+
-seq	5	-
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa b/fastaq/tests/data/sequences_test_split_fixed_size.fa
deleted file mode 100644
index 8b2f4c5..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa
+++ /dev/null
@@ -1,12 +0,0 @@
->seq1
-ACGTNNNNN
->seq2
-ACGTA
->seq3
-NNNN
->seq4
-AC
->seq5
-ACG
->seq6
-A
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.1 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.1
deleted file mode 100644
index a72c34d..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.1
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1:1-4
-ACGT
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.2 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.2
deleted file mode 100644
index 6a1a218..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.2
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1:5-9
-NNNNN
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.3 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.3
deleted file mode 100644
index 5ff6016..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.3
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-ACGTA
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.4 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.4
deleted file mode 100644
index 05a52a9..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.4
+++ /dev/null
@@ -1,2 +0,0 @@
->seq3
-NNNN
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.5 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.5
deleted file mode 100644
index bee7218..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.5
+++ /dev/null
@@ -1,4 +0,0 @@
->seq4
-AC
->seq5
-ACG
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.6 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.6
deleted file mode 100644
index e17b9b7..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.6
+++ /dev/null
@@ -1,2 +0,0 @@
->seq6
-A
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.coords b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.coords
deleted file mode 100644
index 3ed2ead..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.coords
+++ /dev/null
@@ -1,2 +0,0 @@
-seq1:1-4	seq1	0
-seq1:5-9	seq1	4
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.1 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.1
deleted file mode 100644
index a72c34d..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.1
+++ /dev/null
@@ -1,2 +0,0 @@
->seq1:1-4
-ACGT
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.2 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.2
deleted file mode 100644
index 5ff6016..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.2
+++ /dev/null
@@ -1,2 +0,0 @@
->seq2
-ACGTA
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.3 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.3
deleted file mode 100644
index bee7218..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.3
+++ /dev/null
@@ -1,4 +0,0 @@
->seq4
-AC
->seq5
-ACG
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.4 b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.4
deleted file mode 100644
index e17b9b7..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.4
+++ /dev/null
@@ -1,2 +0,0 @@
->seq6
-A
diff --git a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords b/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords
deleted file mode 100644
index 3934ef1..0000000
--- a/fastaq/tests/data/sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords
+++ /dev/null
@@ -1 +0,0 @@
-seq1:1-4	seq1	0
diff --git a/fastaq/tests/data/sequences_test_split_test.fa b/fastaq/tests/data/sequences_test_split_test.fa
deleted file mode 100644
index 7c02b44..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-A
->2
-AA
->3
-AAA
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.2.1 b/fastaq/tests/data/sequences_test_split_test.fa.2.1
deleted file mode 100644
index 5e9a7fe..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.2.1
+++ /dev/null
@@ -1,2 +0,0 @@
->1
-A
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.2.2 b/fastaq/tests/data/sequences_test_split_test.fa.2.2
deleted file mode 100644
index 7e79b53..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.2.2
+++ /dev/null
@@ -1,2 +0,0 @@
->2
-AA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.2.3 b/fastaq/tests/data/sequences_test_split_test.fa.2.3
deleted file mode 100644
index 22603c9..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.2.3
+++ /dev/null
@@ -1,2 +0,0 @@
->3
-AAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.2.4 b/fastaq/tests/data/sequences_test_split_test.fa.2.4
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.2.4
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.3.1 b/fastaq/tests/data/sequences_test_split_test.fa.3.1
deleted file mode 100644
index 5a17cab..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.3.1
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-A
->2
-AA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.3.2 b/fastaq/tests/data/sequences_test_split_test.fa.3.2
deleted file mode 100644
index 22603c9..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.3.2
+++ /dev/null
@@ -1,2 +0,0 @@
->3
-AAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.3.3 b/fastaq/tests/data/sequences_test_split_test.fa.3.3
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.3.3
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.4.1 b/fastaq/tests/data/sequences_test_split_test.fa.4.1
deleted file mode 100644
index 5a17cab..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.4.1
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-A
->2
-AA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.4.2 b/fastaq/tests/data/sequences_test_split_test.fa.4.2
deleted file mode 100644
index 22603c9..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.4.2
+++ /dev/null
@@ -1,2 +0,0 @@
->3
-AAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.4.3 b/fastaq/tests/data/sequences_test_split_test.fa.4.3
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.4.3
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.6.1 b/fastaq/tests/data/sequences_test_split_test.fa.6.1
deleted file mode 100644
index a7fcecf..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.6.1
+++ /dev/null
@@ -1,6 +0,0 @@
->1
-A
->2
-AA
->3
-AAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.6.2 b/fastaq/tests/data/sequences_test_split_test.fa.6.2
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.6.2
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.1 b/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.1
deleted file mode 100644
index 5a17cab..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.1
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-A
->2
-AA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.2 b/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.2
deleted file mode 100644
index 22603c9..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.2
+++ /dev/null
@@ -1,2 +0,0 @@
->3
-AAA
diff --git a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.3 b/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.3
deleted file mode 100644
index 15e79c4..0000000
--- a/fastaq/tests/data/sequences_test_split_test.fa.6.limit2.3
+++ /dev/null
@@ -1,2 +0,0 @@
->4
-AAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.long.fa b/fastaq/tests/data/sequences_test_split_test.long.fa
deleted file mode 100644
index 23dad6a..0000000
--- a/fastaq/tests/data/sequences_test_split_test.long.fa
+++ /dev/null
@@ -1,4 +0,0 @@
->1
-AAAAAAAA
->2
-AAAAAAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.long.fa.2.1 b/fastaq/tests/data/sequences_test_split_test.long.fa.2.1
deleted file mode 100644
index ecc99e6..0000000
--- a/fastaq/tests/data/sequences_test_split_test.long.fa.2.1
+++ /dev/null
@@ -1,2 +0,0 @@
->1
-AAAAAAAA
diff --git a/fastaq/tests/data/sequences_test_split_test.long.fa.2.2 b/fastaq/tests/data/sequences_test_split_test.long.fa.2.2
deleted file mode 100644
index 2a2c6a7..0000000
--- a/fastaq/tests/data/sequences_test_split_test.long.fa.2.2
+++ /dev/null
@@ -1,2 +0,0 @@
->2
-AAAAAAAA
diff --git a/fastaq/tests/data/sequences_test_strip_after_whitespace.fa b/fastaq/tests/data/sequences_test_strip_after_whitespace.fa
deleted file mode 100644
index d394bf9..0000000
--- a/fastaq/tests/data/sequences_test_strip_after_whitespace.fa
+++ /dev/null
@@ -1,6 +0,0 @@
->seq 1
-ACGT
->seq 1 2
-ACGT
->seq
-ACGT
diff --git a/fastaq/tests/data/sequences_test_strip_after_whitespace.fa.to_fasta b/fastaq/tests/data/sequences_test_strip_after_whitespace.fa.to_fasta
deleted file mode 100644
index cde7200..0000000
--- a/fastaq/tests/data/sequences_test_strip_after_whitespace.fa.to_fasta
+++ /dev/null
@@ -1,6 +0,0 @@
->seq
-ACGT
->seq
-ACGT
->seq
-ACGT
diff --git a/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq b/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq
deleted file mode 100644
index 05a65a4..0000000
--- a/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq
+++ /dev/null
@@ -1,12 +0,0 @@
- at one/1
-A
-+
-I
- at one/2
-A
-+
-I
- at two/3
-A
-+
-I
diff --git a/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq.stripped b/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq.stripped
deleted file mode 100644
index 4425cc6..0000000
--- a/fastaq/tests/data/sequences_test_strip_illumina_suffix.fq.stripped
+++ /dev/null
@@ -1,12 +0,0 @@
- at one
-A
-+
-I
- at one
-A
-+
-I
- at two/3
-A
-+
-I
diff --git a/fastaq/tests/data/sequences_test_to_unique_by_id.fa b/fastaq/tests/data/sequences_test_to_unique_by_id.fa
deleted file mode 100644
index 5b486ee..0000000
--- a/fastaq/tests/data/sequences_test_to_unique_by_id.fa
+++ /dev/null
@@ -1,11 +0,0 @@
->seq1
-AA
->seq2
-A
->seq3
-A
->seq1
-A
->seq4
->seq1
-AAA
diff --git a/fastaq/tests/data/sequences_test_to_unique_by_id.fa.out b/fastaq/tests/data/sequences_test_to_unique_by_id.fa.out
deleted file mode 100644
index 8c40ed1..0000000
--- a/fastaq/tests/data/sequences_test_to_unique_by_id.fa.out
+++ /dev/null
@@ -1,6 +0,0 @@
->seq1
-AAA
->seq2
-A
->seq3
-A
diff --git a/fastaq/tests/data/sequences_test_translate.fa b/fastaq/tests/data/sequences_test_translate.fa
deleted file mode 100644
index 62c5afc..0000000
--- a/fastaq/tests/data/sequences_test_translate.fa
+++ /dev/null
@@ -1,2 +0,0 @@
->seq
-GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA
diff --git a/fastaq/tests/data/sequences_test_translate.fa.frame0 b/fastaq/tests/data/sequences_test_translate.fa.frame0
deleted file mode 100644
index 0cdd1cf..0000000
--- a/fastaq/tests/data/sequences_test_translate.fa.frame0
+++ /dev/null
@@ -1,3 +0,0 @@
->seq
-AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVV
-V***
diff --git a/fastaq/tests/data/sequences_test_translate.fa.frame1 b/fastaq/tests/data/sequences_test_translate.fa.frame1
deleted file mode 100644
index 80a5aaa..0000000
--- a/fastaq/tests/data/sequences_test_translate.fa.frame1
+++ /dev/null
@@ -1,3 +0,0 @@
->seq
-QPRLEGDAGVTMTIAVKSNREAGVTI*SFYCYSCLKRCSFHPRLAVHPRLQPRLGTM*SW
-FNS
diff --git a/fastaq/tests/data/sequences_test_translate.fa.frame2 b/fastaq/tests/data/sequences_test_translate.fa.frame2
deleted file mode 100644
index 874a17d..0000000
--- a/fastaq/tests/data/sequences_test_translate.fa.frame2
+++ /dev/null
@@ -1,3 +0,0 @@
->seq
-SRG*KATPA*Q*RLL*RATGRRGSPYNHFIATPA*KDVLSTPA*QFILVYNHDLVLCSRG
-LIV
diff --git a/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa b/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa
deleted file mode 100644
index 752e880..0000000
--- a/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa
+++ /dev/null
@@ -1,10 +0,0 @@
->1
-A
->2
-nNNNNNNCNNNANNNN
->3
-NNnA
->4
-AnnnNn
->5
-NNnnnNNn
diff --git a/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa.trimmed b/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa.trimmed
deleted file mode 100644
index f9ca62c..0000000
--- a/fastaq/tests/data/sequences_test_trim_Ns_at_end.fa.trimmed
+++ /dev/null
@@ -1,8 +0,0 @@
->1
-A
->2
-CNNNA
->3
-A
->4
-A
diff --git a/fastaq/tests/data/sequences_test_trimmed.fq b/fastaq/tests/data/sequences_test_trimmed.fq
deleted file mode 100644
index ba91557..0000000
--- a/fastaq/tests/data/sequences_test_trimmed.fq
+++ /dev/null
@@ -1,8 +0,0 @@
- at ID
-GT
-+
-II
- at ID
-GT
-+
-II
diff --git a/fastaq/tests/data/sequences_test_untrimmed.fq b/fastaq/tests/data/sequences_test_untrimmed.fq
deleted file mode 100644
index 349de37..0000000
--- a/fastaq/tests/data/sequences_test_untrimmed.fq
+++ /dev/null
@@ -1,16 +0,0 @@
- at ID
-ACGTA
-+
-IIIII
- at ID
-ACGTA
-+blah
-IIIII
-@
-NNN
-+
-III
-@
-N
-+
-I
diff --git a/fastaq/tests/data/utils_test_file_transpose.txt b/fastaq/tests/data/utils_test_file_transpose.txt
deleted file mode 100644
index 1661a3c..0000000
--- a/fastaq/tests/data/utils_test_file_transpose.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-11	12	13
-21	22	23
-31	32	33
-41	42
-51	52	53
diff --git a/fastaq/tests/data/utils_test_file_transposed.txt b/fastaq/tests/data/utils_test_file_transposed.txt
deleted file mode 100644
index 8af3080..0000000
--- a/fastaq/tests/data/utils_test_file_transposed.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-11	21	31	41	51
-12	22	32	42	52
-13	23	33	.	53
diff --git a/fastaq/tests/data/utils_test_not_really_zipped.gz b/fastaq/tests/data/utils_test_not_really_zipped.gz
deleted file mode 100644
index d81cc07..0000000
--- a/fastaq/tests/data/utils_test_not_really_zipped.gz
+++ /dev/null
@@ -1 +0,0 @@
-42
diff --git a/fastaq/tests/data/utils_test_scaffolds.fa b/fastaq/tests/data/utils_test_scaffolds.fa
deleted file mode 100644
index 5c1b14b..0000000
--- a/fastaq/tests/data/utils_test_scaffolds.fa
+++ /dev/null
@@ -1,8 +0,0 @@
->scaf1
-ACGT
->scaf2
-ACNNNGTNA
->scaf3
-NNAAAANNN
->scaf4
-NNNNNNN
diff --git a/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.fa b/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.fa
deleted file mode 100644
index a599ad2..0000000
--- a/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.fa
+++ /dev/null
@@ -1,10 +0,0 @@
->scaf1.1.4
-ACGT
->scaf2.1.2
-AC
->scaf2.6.7
-GT
->scaf2.9.9
-A
->scaf3.3.6
-AAAA
diff --git a/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.number_contigs.fa b/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.number_contigs.fa
deleted file mode 100644
index 412e4bd..0000000
--- a/fastaq/tests/data/utils_test_scaffolds.fa.to_contigs.number_contigs.fa
+++ /dev/null
@@ -1,10 +0,0 @@
->scaf1.1
-ACGT
->scaf2.1
-AC
->scaf2.2
-GT
->scaf2.3
-A
->scaf3.1
-AAAA
diff --git a/fastaq/tests/data/utils_test_system_call.txt b/fastaq/tests/data/utils_test_system_call.txt
deleted file mode 100644
index f5b2b3b..0000000
--- a/fastaq/tests/data/utils_test_system_call.txt
+++ /dev/null
@@ -1 +0,0 @@
-this is the contents of system call test file
diff --git a/fastaq/tests/intervals_test.py b/fastaq/tests/intervals_test.py
deleted file mode 100644
index c6282a0..0000000
--- a/fastaq/tests/intervals_test.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/env python3
-
-import unittest
-from fastaq import intervals
-
-class TestIntervals(unittest.TestCase):
-    def test_init(self):
-        '''Throw error if try to construct genome_interval from a non-int, or end<start'''
-        with self.assertRaises(intervals.Error):
-            intervals.Interval('a', 1)
-        with self.assertRaises(intervals.Error):
-            intervals.Interval(1, 'a')
-        with self.assertRaises(intervals.Error):
-            intervals.Interval('a', 'a')
-        with self.assertRaises(intervals.Error):
-            intervals.Interval(3, 2)
-
-    def test_comparisons(self):
-        '''<, <=, == should work as expected'''
-        self.assertTrue(intervals.Interval(1,2) < intervals.Interval(2,2))
-        self.assertTrue(intervals.Interval(1,2) <= intervals.Interval(2,2))
-        self.assertFalse(intervals.Interval(2,2) <= intervals.Interval(1,2))
-        self.assertFalse(intervals.Interval(2,2) < intervals.Interval(1,2))
-        self.assertFalse(intervals.Interval(2,2) < intervals.Interval(2,2))
-        self.assertTrue(intervals.Interval(1,2) == intervals.Interval(1,2))
-        self.assertFalse(intervals.Interval(1,2) == intervals.Interval(1,3))
-        self.assertTrue(intervals.Interval(1,2) != intervals.Interval(1,3))
-        self.assertFalse(intervals.Interval(1,2) != intervals.Interval(1,2))
-
-    def test_len(self):
-        self.assertEqual(len(intervals.Interval(1,2)), 2)
-        self.assertEqual(len(intervals.Interval(1,1)), 1)
-        self.assertEqual(len(intervals.Interval(10,20)), 11)
-
-    def test_intersects(self):
-        '''Intersection of two intervals should do the right thing'''
-        a = intervals.Interval(5, 10)
-        no_intersect = [intervals.Interval(3, 4),
-                        intervals.Interval(11,20)]
-        intersect = [intervals.Interval(3,5),
-                     intervals.Interval(3,6),
-                     intervals.Interval(9,12),
-                     intervals.Interval(10,12),
-                     intervals.Interval(6,7),
-                     intervals.Interval(1,20)]
-
-        for i in no_intersect:
-            self.assertFalse(a.intersects(i), 'shouldn\'t intersect: ' + str(a) + ', ' + str(i))
-
-        for i in intersect:
-            self.assertTrue(a.intersects(i), 'should intersect: ' + str(a) + ', ' + str(i))
-
-    def test_contains(self):
-        '''Check that contains() works as expected'''
-        a = intervals.Interval(5, 10)
-        not_contained = [intervals.Interval(1,2),
-                         intervals.Interval(4,5),
-                         intervals.Interval(4,10),
-                         intervals.Interval(4,11),
-                         intervals.Interval(5,11),
-                         intervals.Interval(1,2),
-                         intervals.Interval(9,11),
-                         intervals.Interval(10,11),
-                         intervals.Interval(11,20)]
-
-
-        contained = [intervals.Interval(5,5),
-                     intervals.Interval(5,10),
-                     intervals.Interval(6,7),
-                     intervals.Interval(6,10),
-                     intervals.Interval(10,10)]
-
-        for i in not_contained:
-            self.assertFalse(a.contains(i), 'shouldn\'t contain: ' + str(a) + ', ' + str(i))
-
-        for i in contained:
-            self.assertTrue(a.contains(i), 'should contain: ' + str(a) + ', ' + str(i))
-
-    def test_union(self):
-        '''Union should either return None or the correct union'''
-        a = intervals.Interval(5, 10)
-        b = intervals.Interval(8, 15)
-        c = intervals.Interval(12, 20)
-        d = intervals.Interval(21,22)
-        self.assertEqual(a.union(c), None)
-        self.assertEqual(c.union(a), None)
-        self.assertEqual(a.union(b), intervals.Interval(5,15))
-        self.assertEqual(b.union(a), intervals.Interval(5,15))
-        self.assertEqual(c.union(d), intervals.Interval(12,22))
-        self.assertEqual(d.union(c), intervals.Interval(12,22))
-
-    def test_union_flll_gap(self):
-        '''union_fill_gap() should ignore intersections and return the maximum range of coords'''
-        a = intervals.Interval(5, 10)
-        b = intervals.Interval(8, 15)
-        c = intervals.Interval(12, 20)
-        d = intervals.Interval(21,22)
-        self.assertEqual(a.union_fill_gap(c), intervals.Interval(5,20))
-        self.assertEqual(c.union_fill_gap(a), intervals.Interval(5,20))
-        self.assertEqual(a.union_fill_gap(b), intervals.Interval(5,15))
-        self.assertEqual(b.union_fill_gap(a), intervals.Interval(5,15))
-        self.assertEqual(c.union_fill_gap(d), intervals.Interval(12,22))
-        self.assertEqual(d.union_fill_gap(c), intervals.Interval(12,22))
-
-
-    def test_intersection(self):
-        '''Intersection should either return None or the correct intersection'''
-        a = intervals.Interval(5, 10)
-        b = intervals.Interval(8, 15)
-        c = intervals.Interval(12, 20)
-        self.assertEqual(a.intersection(c), None)
-        self.assertEqual(a.intersection(b), intervals.Interval(8,10))
-
-class Test_intersection(unittest.TestCase):
-    def test_intersection(self):
-        '''intersection() should correctly intersect two lists of intervals'''
-        a = [intervals.Interval(1,2),
-             intervals.Interval(10,20),
-             intervals.Interval(51,52),
-             intervals.Interval(54,55),
-             intervals.Interval(57,58)]
-
-        b = [intervals.Interval(5,6),
-             intervals.Interval(9,11),
-             intervals.Interval(13,14),
-             intervals.Interval(17,18),
-             intervals.Interval(20,25),
-             intervals.Interval(50,60)]
-
-        c = [intervals.Interval(100,200)]
-
-        i = [intervals.Interval(10,11),
-             intervals.Interval(13,14),
-             intervals.Interval(17,18),
-             intervals.Interval(20,20),
-             intervals.Interval(51,52),
-             intervals.Interval(54,55),
-             intervals.Interval(57,58)]
-
-        self.assertSequenceEqual(intervals.intersection(a,b), i)
-        self.assertSequenceEqual(intervals.intersection(b,a), i)
-        self.assertSequenceEqual(intervals.intersection(c,a), [])
-        self.assertEqual(intervals.intersection([],a), [])
-        self.assertEqual(intervals.intersection(a,[]), [])
-
-class Test_merge_overlapping_in_list(unittest.TestCase):
-    def test_merge_overlapping_in_list(self):
-        '''merge_overlapping_in_list() merges correctly'''
-        a = [intervals.Interval(1,2),
-             intervals.Interval(51,60),
-             intervals.Interval(10,20),
-             intervals.Interval(20,30),
-             intervals.Interval(20,30),
-             intervals.Interval(29,50),
-             intervals.Interval(65,70)]
-
-        b = [intervals.Interval(1,2),
-             intervals.Interval(10,60),
-             intervals.Interval(65,70)]
-
-        intervals.merge_overlapping_in_list(a)
-        self.assertSequenceEqual(a, b)
-
-class Test_remove_contained_in_list(unittest.TestCase):
-    def test_remove_contained_in_list(self):
-        '''test_remove_contained_in_list removes the right elements of list'''
-        a = [intervals.Interval(1,2),
-             intervals.Interval(4,4),
-             intervals.Interval(4,5),
-             intervals.Interval(5,6),
-             intervals.Interval(7,9),
-             intervals.Interval(8,10),
-             intervals.Interval(9,11),
-             intervals.Interval(20,25),
-             intervals.Interval(20,24),
-             intervals.Interval(20,26),
-             intervals.Interval(30,38),
-             intervals.Interval(30,37),
-             intervals.Interval(30,36),
-             intervals.Interval(30,35),
-             intervals.Interval(30,35),
-             intervals.Interval(32,33),
-             intervals.Interval(38,50),
-             intervals.Interval(65,70),
-             intervals.Interval(67,70)]
-
-        b = [intervals.Interval(1,2),
-             intervals.Interval(4,5),
-             intervals.Interval(5,6),
-             intervals.Interval(7,9),
-             intervals.Interval(8,10),
-             intervals.Interval(9,11),
-             intervals.Interval(20,26),
-             intervals.Interval(30,38),
-             intervals.Interval(38,50),
-             intervals.Interval(65,70)]
-
-        intervals.remove_contained_in_list(a)
-        self.assertSequenceEqual(a, b)
-
-class Test_length_sum_from_list(unittest.TestCase):
-    def test_length_sum_from_list(self):
-        '''Test that total length of intervals is summed correctly'''
-        a = [intervals.Interval(1,2),
-             intervals.Interval(4,5),
-             intervals.Interval(10,19)]
-
-        self.assertEqual(14, intervals.length_sum_from_list(a))
-
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/fastaq/tests/sequences_test.py b/fastaq/tests/sequences_test.py
deleted file mode 100644
index 4bafb66..0000000
--- a/fastaq/tests/sequences_test.py
+++ /dev/null
@@ -1,535 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import filecmp
-import os
-import unittest
-from fastaq import sequences, utils, intervals
-
-modules_dir = os.path.dirname(os.path.abspath(sequences.__file__))
-data_dir = os.path.join(modules_dir, 'tests', 'data')
-
-class Error (Exception): pass
-
-expected_embl = [
-    'aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcattcacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgagtcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttgaaggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaatatccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgctacaaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctcttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaaatattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactctttttcat [...]
-    'aaacaaaccaaatatggattttattgtagccatatttgctctgtttgttattagctcattcacaattacttccacaaatgcagttgaagcttctactcttcttgacataggtaacctgagtcggagcagttttcctcgtggcttcatctttggtgctggatcttcagcataccaatttgaaggtgcagtaaacgaaggcggtagaggaccaagtatttgggataccttcacccataaatatccagaaaaaataagggatggaagcaatgcagacatcacggttgaccaatatcaccgctacaaggaagatgttgggattatgaaggatcaaaatatggattcgtatagattctcaatctcttggccaagaatactcccaaagggaaagttgagcggaggcataaatcacgaaggaatcaaatattacaacaaccttatcaacgaactattggctaacggtatacaaccatttgtaactctttttcat [...]
-]
-class TestFasta(unittest.TestCase):
-    def setUp(self):
-        self.fasta = sequences.Fasta('ID', 'ACGTA')
-
-    def test_equality(self):
-        self.assertTrue(self.fasta == sequences.Fasta('ID', 'ACGTA'))
-        self.assertFalse(self.fasta == sequences.Fasta('I', 'ACGTA'))
-        self.assertFalse(self.fasta == sequences.Fasta('ID', 'ACGT'))
-        self.assertFalse(self.fasta != sequences.Fasta('ID', 'ACGTA'))
-        self.assertTrue(self.fasta != sequences.Fasta('I', 'ACGTA'))
-        self.assertTrue(self.fasta != sequences.Fasta('ID', 'ACGT'))
-
-    def test_init(self):
-        '''__init__ should get the ID and sequence correctly'''
-        self.assertEqual(self.fasta.id, 'ID')
-        self.assertEqual(self.fasta.seq, 'ACGTA')
-
-    def test_get_next_from_file(self):
-        '''get_next_from_file() should read seqs from OK, including weirdness in file'''
-        f_in = utils.open_file_read(os.path.join(data_dir, 'sequences_test.fa'))
-        fa = sequences.Fasta()
-        counter = 1
-
-        while fa.get_next_from_file(f_in):
-            self.assertEqual(fa, sequences.Fasta(str(counter), 'ACGTA'))
-            counter += 1
-
-        utils.close(f_in)
-
-    def test_get_id_from_header_line(self):
-        '''Check that can get ID from header line or die properly'''
-        self.assertEqual(sequences.Fasta._get_id_from_header_line(self.fasta, '>X'), 'X')
-        with self.assertRaises(sequences.Error):
-            self.assertEqual(sequences.Fasta._get_id_from_header_line(self.fasta, 'X'), 'X')
-
-    def test_getitem(self):
-        '''getitem() should return the right subsequence'''
-        seq = 'AACGTGTCA'
-        fa = sequences.Fasta('x', seq)
-        self.assertEqual(seq[1], fa[1])
-        self.assertEqual(seq[0:2], fa[0:2])
-        self.assertEqual(seq[1:], fa[1:])
-
-    def test_len(self):
-        '''len() should return the length of the sequence'''
-        self.assertEqual(5, len(self.fasta))
-
-    def test_print_line_length(self):
-        '''__str__ should be formatted correctly with the right number of chars per line of sequence'''
-        line_lengths = [0, 3]
-        correct_files = [os.path.join(data_dir, x) for x in ['sequences_test_one-per-line.fa', 'sequences_test_3-per-line.fa']]
-
-        for i in range(len(line_lengths)):
-            seq_reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test_one-per-line.fa'))
-            sequences.Fasta.line_length = line_lengths[i]
-            tmp_out = 'tmp.line_length_test.fa'
-            f = utils.open_file_write(tmp_out)
-            for s in seq_reader:
-                print(s, file=f)
-            utils.close(f)
-            self.assertTrue(filecmp.cmp(correct_files[i], tmp_out))
-            os.unlink(tmp_out)
-
-        sequences.Fasta.line_length = 60
-
-    def test_strip_after_first_whitespace(self):
-        '''Test strip_after_first_whitespace()'''
-        seqs = [
-            sequences.Fasta('name', 'A'),
-            sequences.Fasta('name foo', 'A'),
-            sequences.Fasta('name foo bar', 'A'),
-            sequences.Fasta('name\tfoo', 'A'),
-        ]
-
-        for seq in seqs:
-            seq.strip_after_first_whitespace()
-
-        for seq in seqs:
-            self.assertEqual(seq.id, 'name')
-
-    def test_strip_illumina_suffix(self):
-        '''Check that /1 and /2 removed correctly from IDs'''
-        seqs = [sequences.Fasta('name/1', 'A'),
-                sequences.Fasta('name/2', 'A'),
-                sequences.Fasta('name', 'A'),
-                sequences.Fasta('name/1/2', 'A'),
-                sequences.Fasta('name/2/1', 'A'),
-                sequences.Fasta('name/3', 'A')]
-
-        correct_names = ['name', 'name', 'name', 'name/1', 'name/2', 'name/3']
-
-        for seq in seqs:
-            seq.strip_illumina_suffix()
-
-        for i in range(len(seqs)):
-            self.assertEqual(seqs[i].id, correct_names[i])
-
-    def test_revcomp(self):
-        '''revcomp() should correctly reverse complement a sequence'''
-        fa = sequences.Fasta('ID', 'ACGTNacgtn')
-        fa.revcomp()
-        self.assertEqual(fa, sequences.Fasta('ID', 'nacgtNACGT'))
-
-    def test_gaps(self):
-        '''gaps() should find the gaps in a sequence correctly'''
-        test_seqs = [sequences.Fasta('ID', 'ACGT'),
-                     sequences.Fasta('ID', 'NACGT'),
-                     sequences.Fasta('ID', 'NACGTN'),
-                     sequences.Fasta('ID', 'ANNCGT'),
-                     sequences.Fasta('ID', 'NANNCGTNN')]
-
-        correct_gaps = [[],
-                        [intervals.Interval(0, 0)],
-                        [intervals.Interval(0, 0), intervals.Interval(5, 5)],
-                        [intervals.Interval(1, 2)],
-                        [intervals.Interval(0, 0), intervals.Interval(2, 3), intervals.Interval(7, 8)]]
-
-        for i in range(len(test_seqs)):
-            gaps = test_seqs[i].gaps()
-            self.assertListEqual(correct_gaps[i], gaps)
-
-    def test_contig_coords(self):
-        '''contig_coords() should get the coords of all contigs in a sequence correctly'''
-        test_seqs = [sequences.Fasta('ID', 'ACGT'),
-                     sequences.Fasta('ID', 'NACGT'),
-                     sequences.Fasta('ID', 'NNACGT'),
-                     sequences.Fasta('ID', 'ACGTN'),
-                     sequences.Fasta('ID', 'ACGTNN'),
-                     sequences.Fasta('ID', 'NANNCGT'),
-                     sequences.Fasta('ID', 'ACNNNGTNA'),
-                     sequences.Fasta('ID', 'ANNCGTNNAAAAA')]
-
-        correct_coords = [[intervals.Interval(0,3)],
-                         [intervals.Interval(1, 4)],
-                         [intervals.Interval(2, 5)],
-                         [intervals.Interval(0, 3)],
-                         [intervals.Interval(0, 3)],
-                         [intervals.Interval(1, 1), intervals.Interval(4,6)],
-                         [intervals.Interval(0, 1), intervals.Interval(5, 6), intervals.Interval(8, 8)],
-                         [intervals.Interval(0, 0), intervals.Interval(3, 5), intervals.Interval(8, 12)]]
-
-        for i in range(len(test_seqs)):
-            gaps = test_seqs[i].contig_coords()
-            self.assertListEqual(correct_coords[i], gaps)
-
-    def test_is_all_Ns(self):
-        '''Test is_all_Ns()'''
-        self.assertTrue(sequences.Fasta('ID', 'n').is_all_Ns())
-        self.assertTrue(sequences.Fasta('ID', 'N').is_all_Ns())
-        self.assertTrue(sequences.Fasta('ID', 'nNn').is_all_Ns())
-        self.assertFalse(sequences.Fasta('ID', 'a').is_all_Ns())
-        self.assertFalse(sequences.Fasta('ID', '').is_all_Ns())
-        self.assertFalse(sequences.Fasta('ID', 'anNg').is_all_Ns())
-        self.assertFalse(sequences.Fasta('ID', 'naN').is_all_Ns())
-        self.assertFalse(sequences.Fasta('ID', 'anNg').is_all_Ns(start=0, end=0))
-        self.assertFalse(sequences.Fasta('ID', 'anNg').is_all_Ns(start=0, end=1))
-        self.assertTrue(sequences.Fasta('ID', 'anNg').is_all_Ns(start=1, end=1))
-        self.assertTrue(sequences.Fasta('ID', 'anNg').is_all_Ns(start=1, end=2))
-        self.assertFalse(sequences.Fasta('ID', 'anNg').is_all_Ns(start=1))
-        self.assertTrue(sequences.Fasta('ID', 'anN').is_all_Ns(start=1))
-        self.assertFalse(sequences.Fasta('ID', 'anNg').is_all_Ns(end=1))
-        self.assertTrue(sequences.Fasta('ID', 'nNA').is_all_Ns(end=1))
-
-        with self.assertRaises(sequences.Error):
-            sequences.Fasta('ID', 'anNg').is_all_Ns(start=1, end=0)
-
-    def test_trim_Ns(self):
-        '''trim_Ns() should do the right trimming of a sequence'''
-        fa = sequences.Fasta('ID', 'ANNANA')
-        test_seqs = [sequences.Fasta('ID', 'ANNANA'),
-                     sequences.Fasta('ID', 'NANNANA'),
-                     sequences.Fasta('ID', 'NANNANAN'),
-                     sequences.Fasta('ID', 'ANNANAN'),
-                     sequences.Fasta('ID', 'NNNNNNANNANAN'),
-                     sequences.Fasta('ID', 'NNANNANANn')]
-
-        for s in test_seqs:
-            s.trim_Ns()
-            self.assertEqual(fa, s)
-
-    def test_replace_bases(self):
-        '''Check that bases get replaced correctly'''
-        fa = sequences.Fasta('X', 'AUCGTUUACT')
-        fa.replace_bases('U', 'T')
-        self.assertEqual(fa, sequences.Fasta('X', 'ATCGTTTACT'))
-
-    def test_replace_interval(self):
-        '''Test replace_interval()'''
-        fa = sequences.Fasta('ID', 'ACGTA')
-        fa.replace_interval(0, 0, 'NEW')
-        self.assertEqual(fa, sequences.Fasta('ID', 'NEWCGTA'))
-
-        fa = sequences.Fasta('ID', 'ACGTA')
-        fa.replace_interval(4, 4, 'NEW')
-        self.assertEqual(fa, sequences.Fasta('ID', 'ACGTNEW'))
-
-        fa = sequences.Fasta('ID', 'ACGTA')
-        fa.replace_interval(2, 3, 'NEW')
-        self.assertEqual(fa, sequences.Fasta('ID', 'ACNEWA'))
-
-        fa = sequences.Fasta('ID', 'ACGTA')
-        with self.assertRaises(sequences.Error):
-            fa.replace_interval(3,2,'x')
-        with self.assertRaises(sequences.Error):
-            fa.replace_interval(1,5,'x')
-        with self.assertRaises(sequences.Error):
-            fa.replace_interval(5,10,'x')
-
-        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
-        fq.replace_interval(0, 0, 'NEW', 'III')
-        self.assertEqual(fq, sequences.Fastq('ID', 'NEWCGTA', 'IIIBCDE'))
-
-        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
-        fq.replace_interval(4, 4, 'NEW', 'III')
-        self.assertEqual(fq, sequences.Fastq('ID', 'ACGTNEW', 'ABCDIII'))
-
-        fq = sequences.Fastq('ID', 'ACGTA', 'ABCDE')
-        fq.replace_interval(2, 3, 'NEW', 'III')
-        self.assertEqual(fq, sequences.Fastq('ID', 'ACNEWA', 'ABIIIE'))
-
-        with self.assertRaises(sequences.Error):
-            fq.replace_interval(1,1,'x', 'xx')
-
-    def test_search_string(self):
-        '''Check that search_string() finds all the hits'''
-        fa = sequences.Fasta('X', 'AAA')
-        hits = fa.search('G')
-        self.assertTrue(len(hits) == 0)
-        hits = fa.search('AAA')
-        self.assertListEqual(hits, [(0, '+')])
-        hits = fa.search('AA')
-        self.assertListEqual(hits, [(0, '+'), (1, '+')])
-        hits = fa.search('TTT')
-        self.assertListEqual(hits, [(0, '-')])
-
-    def test_to_Fastq(self):
-        '''Check to_Fastq converts OK, including out of range quality scores'''
-        fa = sequences.Fasta('X', 'AAAAA')
-        quals = [-1, 0, 40, 93, 94]
-        self.assertEqual(sequences.Fastq('X', 'AAAAA', '!!I~~'), fa.to_Fastq(quals))
-        with self.assertRaises(sequences.Error):
-            fa.to_Fastq('AAAAAAAAAAAAA')
-
-
-    def test_translate(self):
-        '''Test nucleotide -> amino acid conversion works on Fasta'''
-        fa = sequences.Fasta('ID', 'GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA')
-        self.assertEqual(sequences.Fasta('ID', 'AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVVV***'), fa.translate())
-        self.assertEqual(sequences.Fasta('ID', 'QPRLEGDAGVTMTIAVKSNREAGVTI*SFYCYSCLKRCSFHPRLAVHPRLQPRLGTM*SWFNS'), fa.translate(frame=1))
-        print(fa.translate(frame=1))
-        self.assertEqual(sequences.Fasta('ID', 'SRG*KATPA*Q*RLL*RATGRRGSPYNHFIATPA*KDVLSTPA*QFILVYNHDLVLCSRGLIV'), fa.translate(frame=2))
-
-
-    def test_split_capillary_id(self):
-        '''Tests that we get information from a sanger capillary read name OK'''
-        ids = ['abcde.p1k', 'abcde.x.p1k', 'abcde.p1ka', 'abcde.q1k', 'abcde.w2k']
-        expected = [{'prefix': 'abcde', 'dir': 'fwd', 'suffix': 'p1k'},
-                    {'prefix': 'abcde.x', 'dir': 'fwd', 'suffix': 'p1k'},
-                    {'prefix': 'abcde', 'dir': 'fwd', 'suffix': 'p1ka'},
-                    {'prefix': 'abcde', 'dir': 'rev', 'suffix': 'q1k'},
-                    {'prefix': 'abcde', 'dir': 'unk', 'suffix': 'w2k'}]
-
-        for i in range(len(ids)):
-            fa = sequences.Fasta(ids[i], 'A')
-            self.assertEqual(fa.split_capillary_id(), expected[i])
-
-        with self.assertRaises(sequences.Error):
-            fa = sequences.Fasta('name', 'A')
-            fa.split_capillary_id()
-
-
-class TestEmbl(unittest.TestCase):
-    def test_get_id_from_header_line(self):
-        '''Test get id from header line of EMBL'''
-        embl = sequences.Embl('ID', 'ACGT')
-        self.assertEqual(embl._get_id_from_header_line('ID   X; blah'), 'X')
-        self.assertEqual(embl._get_id_from_header_line('LOCUS   X foo'), 'X')
-        with self.assertRaises(sequences.Error):
-            self.assertEqual(embl._get_id_from_header_line('ID X;'), 'X')
-        with self.assertRaises(sequences.Error):
-            self.assertEqual(embl._get_id_from_header_line('XX   X;'), 'X')
-
-
-    def test_get_next_from_embl_file(self):
-        f_in = utils.open_file_read(os.path.join(data_dir, 'sequences_test.embl'))
-        embl = sequences.Embl()
-        counter = 1
-
-        while embl.get_next_from_file(f_in):
-            self.assertEqual(embl, sequences.Fasta('seq' + str(counter), expected_embl[counter-1]))
-            counter += 1
-
-        utils.close(f_in)
-
-
-    def test_get_next_from_gbk_file(self):
-        f_in = utils.open_file_read(os.path.join(data_dir, 'sequences_test.gbk'))
-        embl = sequences.Embl()
-        counter = 1
-        expected = [
-            'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgatc',
-            'gatcctccatatacaacggtatctccacctcaggtttagatctcaacaacggaaccattgccgacatgagacagttaggtatcgtcgagagttacaagctaaaacgagcagtagtcagctctgcatctgaagccgctgaagttctactaagggtggataacatcatccgtgcaagaccaatgccatgactcagattctaattttaagctattcaatttctctttgaaa']
-
-        while embl.get_next_from_file(f_in):
-            self.assertEqual(embl, sequences.Fasta('NAME' + str(counter), expected[counter-1]))
-            counter += 1
-
-        utils.close(f_in)
- 
-
-class TestFastq(unittest.TestCase):
-    def setUp(self):
-        self.fastq = sequences.Fastq('ID', 'ACGTA', 'IIIII')
-
-    def test_init(self):
-        '''__init__ should get the ID, sequence and quality correctly'''
-        self.assertEqual(self.fastq.id, 'ID')
-        self.assertEqual(self.fastq.seq, 'ACGTA')
-        self.assertEqual(self.fastq.qual, 'IIIII')
-
-    def test_init_length_mismatch(self):
-        '''__init__ should raise an error when length of seq and quality not the same'''
-        with self.assertRaises(sequences.Error):
-            sequences.Fastq('X', 'A', 'II')
-
-    def test_get_next_from_file(self):
-        '''get_next_from_file() should read seqs from OK, and raise error at badly formatted file'''
-        bad_files = ['sequences_test_fail_no_AT.fq',
-                     'sequences_test_fail_no_seq.fq',
-                     'sequences_test_fail_no_plus.fq',
-                     'sequences_test_fail_no_qual.fq']
-
-        bad_files = [os.path.join(data_dir, x) for x in bad_files]
-
-        for fname in bad_files:
-            f_in = utils.open_file_read(fname)
-            fq = sequences.Fastq()
-            with self.assertRaises(sequences.Error):
-                while fq.get_next_from_file(f_in):
-                    pass
-
-            utils.close(f_in)
-
-        fname = os.path.join(data_dir, 'sequences_test_good_file.fq')
-        try:
-            f_in = open(fname)
-        except IOError:
-            print("Error opening '" + fname + "'", file=sys.stderr)
-            sys.exit(1)
-
-        fq = sequences.Fastq()
-        while fq.get_next_from_file(f_in):
-            self.assertEqual(fq, sequences.Fastq('ID', 'ACGTA', 'IIIII'))
-        utils.close(f_in)
-
-    def test_revcomp(self):
-        '''revcomp() should correctly reverse complement a sequence'''
-        fq = sequences.Fastq('ID', 'ACGTNacgtn', '1234567890')
-        fq.revcomp()
-        self.assertEqual(fq, sequences.Fastq('ID', 'nacgtNACGT', '0987654321'))
-
-    def test_trim_Ns(self):
-        '''trim_Ns() should do the right trimming of a fastq sequence'''
-        fq = sequences.Fastq('ID', 'ANNANA', '111111')
-        test_seqs = [sequences.Fastq('ID', 'ANNANA', '111111'),
-                     sequences.Fastq('ID', 'NANNANA', '1111111'),
-                     sequences.Fastq('ID', 'NANNANAN', '11111111'),
-                     sequences.Fastq('ID', 'ANNANAN', '1111111'),
-                     sequences.Fastq('ID', 'NNNNNNANNANAN', '1111111111111'),
-                     sequences.Fastq('ID', 'NNANNANANn', '1111111111')]
-
-        for s in test_seqs:
-            s.trim_Ns()
-            self.assertEqual(fq, s)
-
-    def test_trim(self):
-        '''trim() should trim the right number of bases off start and end'''
-        fq = sequences.Fastq('ID', '1234567890', '1234567890')
-        fq.trim(0, 0)
-        self.assertEqual(fq, sequences.Fastq('ID', '1234567890', '1234567890'))
-
-        fq = sequences.Fastq('ID', '1234567890', '1234567890')
-        fq.trim(1, 0)
-        self.assertEqual(fq, sequences.Fastq('ID', '234567890', '234567890'))
-
-        fq = sequences.Fastq('ID', '1234567890', '1234567890')
-        fq.trim(0, 1)
-        self.assertEqual(fq, sequences.Fastq('ID', '123456789', '123456789'))
-
-        fq = sequences.Fastq('ID', '1234567890', '1234567890')
-        fq.trim(2, 2)
-        self.assertEqual(fq, sequences.Fastq('ID', '345678', '345678'))
-
-    def test_to_Fasta_and_qual(self):
-        '''Check to_Fasta_and_qual converts quality scores correctly'''
-        fq = sequences.Fastq('ID', 'ACGT', '>ADI')
-        (fa, qual) = fq.to_Fasta_and_qual()
-        self.assertEqual(fa, sequences.Fasta('ID', 'ACGT'))
-        self.assertListEqual(qual, [29, 32, 35, 40])
-
-
-    def test_translate(self):
-        '''Test nucleatide -> amino acid conversion works on Fasta'''
-        fq = sequences.Fastq('ID', 'GCAGCCGCGGCTAGAAGGCGACGCCGGCGTAACAATGACGATTGCTGTGAAGAGCAACAGGGAGGCGGGGGTCACCATATAATCATTTTATTGCTACTCCTGCTTAAAAAGATGTTCTTTCCACCCCCGCCTAGCAGTTCATCCTCGTCTACAACCACGACTTGGTACTATGTAGTCGTGGTTTAATAGTGA', 'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII')
-
-        self.assertEqual(sequences.Fastq('ID', 'AAAARRRRRRNNDDCCEEQQGGGGHHIIILLLLLLKKMFFPPPPSSSSSSTTTTWYYVVVV***', 'IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII'), fq.translate())
-
-class TestFileReader(unittest.TestCase):
-    def test_file_reader_fasta(self):
-        '''file_reader should iterate through a fasta file correctly'''
-        reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test.fa'))
-        counter = 1
-        for seq in reader:
-            self.assertEqual(seq, sequences.Fasta(str(counter), 'ACGTA'))
-            counter += 1
-
-    def test_file_reader_fastq(self):
-        '''file_reader should iterate through a fastq file correctly'''
-        reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test_good_file.fq'))
-        for seq in reader:
-            self.assertEqual(seq, sequences.Fastq('ID', 'ACGTA', 'IIIII'))
-
-    def test_file_reader_bad_format(self):
-        '''file_reader should die properly when not given fasta or fastq file'''
-        with self.assertRaises(sequences.Error):
-            reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test_not_a_fastaq_file'))
-            for seq in reader:
-                pass
-
-    def test_file_reader_gff(self):
-        '''Test read gff file'''
-        good_files = [
-            'sequences_test_gffv3.gff',
-            'sequences_test_gffv3.no_FASTA_line.gff'
-        ]
-        good_files = [os.path.join(data_dir, x) for x in good_files]
-
-        for f in good_files:
-            reader = sequences.file_reader(f)
-            counter = 1
-            for seq in reader:
-                self.assertEqual(seq, sequences.Fasta('seq' + str(counter), 'ACGTACGTAC'))
-                counter += 1
-        
-        bad_files = [
-            'sequences_test_gffv3.no_seq.gff',
-            'sequences_test_gffv3.no_seq.2.gff'
-        ]
-        bad_files = [os.path.join(data_dir, x) for x in bad_files]
-
-        for filename in bad_files:
-            with self.assertRaises(sequences.Error):
-                reader = sequences.file_reader(filename)
-                for seq in reader:
-                    pass
-
-    def test_file_reader_embl(self):
-        '''Test read embl file'''
-        reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test.embl'))
-
-        counter = 1
-        for seq in reader:
-            self.assertEqual(seq, sequences.Fasta('seq' + str(counter), expected_embl[counter-1]))
-            counter += 1
-        
-        bad_files = [
-            'sequences_test.embl.bad',
-            'sequences_test.embl.bad2',
-        ]
-        bad_files = [os.path.join(data_dir, x) for x in bad_files]
-
-        for filename in bad_files:
-            with self.assertRaises(sequences.Error):
-                reader = sequences.file_reader(filename)
-                for seq in reader:
-                    pass
-
-    def test_file_reader_phylip(self):
-        '''Test read phylip file'''
-        test_files = [
-            'sequences_test_phylip.interleaved',
-            'sequences_test_phylip.interleaved2',
-            'sequences_test_phylip.sequential'
-        ]
-
-        test_files = [os.path.join(data_dir, f) for f in test_files]
-
-        expected_seqs = [
-            sequences.Fasta('Turkey', 'AACTNGGGCATTTCAGGGTGAGCCCGGGCAATACAGGGTAT'),
-            sequences.Fasta('Salmo_gair', 'AAGCCTTGGCAGTGCAGGGTGAGCCGTGGCCGGGCACGGTAT'),
-            sequences.Fasta('H. Sapiens', 'ACCGGTTGGCCGTTCAGGGTACAGGTTGGCCGTTCAGGGTAA')
-        ]
-
-        for fname in test_files:
-            reader = sequences.file_reader(fname)
-            i = 0
-            for seq in reader:
-                self.assertEqual(expected_seqs[i], seq)
-                i += 1
-        
-        # files made by seaview are a little different in the first line.
-        # Test one of these
-        expected_seqs = [
-            sequences.Fasta('seq1', 96 * 'G' + 'T'),
-            sequences.Fasta('seq2', 94 * 'A' + 'G')
-        ]
-        
-        reader = sequences.file_reader(os.path.join(data_dir, 'sequences_test_phylip.made_by_seaview'))
-        i = 0
-        for seq in reader:
-            print(seq)
-            self.assertEqual(expected_seqs[i], seq)
-            i += 1
-
-
-if __name__ == '__main__':
-    unittest.main()
-
diff --git a/fastaq/tests/tasks_test.py b/fastaq/tests/tasks_test.py
deleted file mode 100644
index 084425e..0000000
--- a/fastaq/tests/tasks_test.py
+++ /dev/null
@@ -1,449 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import filecmp
-import os
-import unittest
-from fastaq import tasks, sequences
-
-modules_dir = os.path.dirname(os.path.abspath(sequences.__file__))
-data_dir = os.path.join(modules_dir, 'tests', 'data')
-
-class Error (Exception): pass
-
-
-class TestCapillaryToPairs(unittest.TestCase):
-    def test_capillary_to_pairs(self):
-        '''Check that capillary reads file converted to paired and unpaired'''
-        tmp_prefix = 'tmp.cap_to_pairs'
-        tasks.capillary_to_pairs(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa'), tmp_prefix)
-        # sequences have been hashed, so could be in any order in
-        # output files. So need to check contents of files are OK
-        d_correct_paired = {}
-        d_correct_unpaired = {}
-        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa.paired.gz'), d_correct_paired)
-        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test_cap_to_read_pairs.fa.unpaired.gz'), d_correct_unpaired)
-        d_test_paired = {}
-        d_test_unpaired = {}
-        tasks.file_to_dict(tmp_prefix + '.paired.gz', d_test_paired)
-        tasks.file_to_dict(tmp_prefix + '.unpaired.gz', d_test_unpaired)
-        self.assertDictEqual(d_test_paired, d_correct_paired)
-        self.assertDictEqual(d_test_unpaired, d_correct_unpaired)
-        os.unlink(tmp_prefix + '.paired.gz')
-        os.unlink(tmp_prefix + '.unpaired.gz')
-
-
-class TestDeinterleave(unittest.TestCase):
-    def test_deinterleave(self):
-        '''deinterleave should deal with an interleaved file correctly'''
-        tmp_1 = 'tmp.deinterleaved_1.fa'
-        tmp_2 = 'tmp.deinterleaved_2.fa'
-        tasks.deinterleave(os.path.join(data_dir, 'sequences_test_interleaved.fa'), tmp_1, tmp_2)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_deinterleaved_1.fa'), tmp_1))
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_deinterleaved_2.fa'), tmp_2))
-
-        tasks.deinterleave(os.path.join(data_dir, 'sequences_test_interleaved.fq'), tmp_1, tmp_2, fasta_out=True)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_deinterleaved_1.fa'), tmp_1))
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_deinterleaved_2.fa'), tmp_2))
-
-        with self.assertRaises(tasks.Error):
-            tasks.deinterleave(os.path.join(data_dir, 'sequences_test_interleaved_bad.fa'), tmp_1, tmp_2)
-        os.unlink(tmp_1)
-        os.unlink(tmp_2)
-
-
-class TestEnumerateNames(unittest.TestCase):
-    def test_enumerate_names(self):
-        '''Test enomereate_names works with all options'''
-        outfile = 'tmp.enumerate_seqs.fa'
-        rename_out = outfile + '.rename'
-        tasks.enumerate_names(os.path.join(data_dir, 'sequences_test_enumerate_names.fa'), outfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_enumerate_names.fa.out.start.1'), outfile))
-        tasks.enumerate_names(os.path.join(data_dir, 'sequences_test_enumerate_names.fa'), outfile, rename_file=rename_out)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_enumerate_names.fa.out.start.1'), outfile))
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_enumerate_names.fa.out.start.1.rename_file'), rename_out))
-        tasks.enumerate_names(os.path.join(data_dir, 'sequences_test_enumerate_names.fa'), outfile, start_index=2)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_enumerate_names.fa.out.start.2'), outfile))
-        tasks.enumerate_names(os.path.join(data_dir, 'sequences_test_enumerate_names.fa'), outfile, keep_illumina_suffix=True)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_enumerate_names.fa.out.keep_suffix'), outfile))
-        os.unlink(outfile)
-        os.unlink(rename_out)
-
-
-class TestExtendGaps(unittest.TestCase):
-    def test_extend_gaps(self):
-        '''Test that gap extension works'''
-        outfile = 'tmp.gap_extend.fa'
-        tasks.extend_gaps(os.path.join(data_dir, 'sequences_test_extend_gaps.fa'), outfile, trim=2)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_extend_gaps.fa.out'), outfile))
-        os.unlink(outfile)
-
-
-class TestFastqToMiraXml(unittest.TestCase):
-    def test_fastaq_to_mira_xml(self):
-        '''check that fastaq_to_mira_xml makes the correct xml file from a fastq file'''
-        tmp = 'tmp.mira.xml'
-        tasks.fastaq_to_mira_xml(os.path.join(data_dir, 'sequences_test_good_file.fq'), tmp)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_good_file_mira.xml'), tmp))
-        os.unlink(tmp)
-
-
-class TestFilter(unittest.TestCase):
-    def test_length_filter(self):
-        '''Check that filtering by length works as expected'''
-        infile = os.path.join(data_dir, 'sequences_test_length_filter.fa')
-        correct_files = [os.path.join(data_dir, 'sequences_test_length_filter.min-0.max-1.fa'),
-                         os.path.join(data_dir, 'sequences_test_length_filter.min-0.max-inf.fa'),
-                         os.path.join(data_dir, 'sequences_test_length_filter.min-4.max-4.fa')]
-        cutoffs = [(0, 1), (0, float('inf')), (4, 4)]
-
-        for i in range(len(cutoffs)):
-            outfile = 'tmp.length_filter.fa'
-            tasks.filter(infile, outfile, minlength=cutoffs[i][0], maxlength=cutoffs[i][1])
-            self.assertTrue(filecmp.cmp(correct_files[i], outfile))
-            os.unlink(outfile)
-
-    def test_regex_filter(self):
-        '''Check that filtering by name regex works as expected'''
-        infile = os.path.join(data_dir, 'sequences_test_filter_by_regex.fa')
-        correct_files = [os.path.join(data_dir, 'sequences_test_filter_by_regex.numeric.fa'),
-                         os.path.join(data_dir, 'sequences_test_filter_by_regex.first-of-pair.fa'),
-                         os.path.join(data_dir, 'sequences_test_filter_by_regex.first-char-a.fa')]
-        regexes = ['^[0-9]+$', '/1$', '^a']
-
-        for i in range(len(regexes)):
-            outfile = 'tmp.regex_filter.fa'
-            tasks.filter(infile, outfile, regex=regexes[i])
-            self.assertTrue(filecmp.cmp(correct_files[i], outfile))
-            os.unlink(outfile)
-
-    def test_ids_from_file_filter(self):
-        '''Test that can extract reads from a file of read names'''
-        infile = os.path.join(data_dir, 'sequences_test_filter_by_ids_file.fa')
-        outfile = 'tmp.ids_file_filter.fa'
-        tasks.filter(infile, outfile, ids_file=infile + '.ids')
-        self.assertTrue(filecmp.cmp(infile + '.filtered', outfile))
-        os.unlink(outfile)
-
-    def test_invert_filter(self):
-        '''Test that inverting filtering works'''
-        infile = os.path.join(data_dir, 'sequences_test_filter_by_ids_file.fa')
-        outfile = 'tmp.ids_file_filter.fa'
-        tasks.filter(infile, outfile, ids_file=infile + '.ids', invert=True)
-        self.assertTrue(filecmp.cmp(infile + '.filtered.invert', outfile))
-        os.unlink(outfile)
-
-
-class TestGetSeqsFlankingGaps(unittest.TestCase):
-    def test_get_seqs_flanking_gaps(self):
-        outfile = 'tmp.seqs_flanking_gaps'
-        tasks.get_seqs_flanking_gaps(os.path.join(data_dir, 'sequences_test_get_seqs_flanking_gaps.fa'), outfile, 3, 3)
-        self.assertTrue(filecmp.cmp(outfile, os.path.join(data_dir, 'sequences_test_get_seqs_flanking_gaps.fa.out')))
-        os.unlink(outfile)
-
-
-class TestInterleave(unittest.TestCase):
-    def test_interleave(self):
-        '''Check that interleave works as expected'''
-        tmp = 'tmp.interleaved.fa'
-        tasks.interleave(os.path.join(data_dir, 'sequences_test_deinterleaved_1.fa'),
-                         os.path.join(data_dir, 'sequences_test_deinterleaved_2.fa'),
-                         tmp)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_interleaved.fa'), tmp))
-
-        with self.assertRaises(tasks.Error):
-            tasks.interleave(os.path.join(data_dir, 'sequences_test_deinterleaved_bad_1.fa'),
-                             os.path.join(data_dir, 'sequences_test_deinterleaved_bad_2.fa'),
-                             tmp)
-
-        with self.assertRaises(tasks.Error):
-            tasks.interleave(os.path.join(data_dir, 'sequences_test_deinterleaved_bad2_1.fa'),
-                             os.path.join(data_dir, 'sequences_test_deinterleaved_bad2_2.fa'),
-                             tmp)
-        os.unlink(tmp)
-
-
-class TestMakeRandomContigs(unittest.TestCase):
-    def test_make_random_contigs(self):
-        '''Test make_random_contigs()'''
-        # Can't guarantee same results from random (even using same seed), so
-        # just check sequence names and lengths
-        def files_are_equal(file1, file2):
-            seqs1 = {}
-            seqs2 = {}
-            tasks.file_to_dict(file1, seqs1)
-            tasks.file_to_dict(file2, seqs2)
-            if len(seqs1) != len(seqs2):
-                return False
-
-            for name in seqs1:
-                seq1 = seqs1[name]
-                seq2 = seqs2[name]
-                if seq1.id != seq2.id:
-                    return False
-                if len(seq1) != len(seq2):
-                    return False
-
-            return True
-
-        tmp = 'tmp.random_contigs.fa'
-        tasks.make_random_contigs(2, 3, tmp)
-        self.assertTrue(files_are_equal(os.path.join(data_dir, 'sequences_test_make_random_contigs.default.fa'), tmp))
-        tasks.make_random_contigs(2, 3, tmp, prefix='p')
-        self.assertTrue(files_are_equal(os.path.join(data_dir, 'sequences_test_make_random_contigs.prefix-p.fa'), tmp))
-        tasks.make_random_contigs(2, 3, tmp, first_number=42)
-        self.assertTrue(files_are_equal(os.path.join(data_dir, 'sequences_test_make_random_contigs.first-42.fa'), tmp))
-        tasks.make_random_contigs(28, 3, tmp, name_by_letters=True)
-        self.assertTrue(files_are_equal(os.path.join(data_dir, 'sequences_test_make_random_contigs.name-by-letters.fa'), tmp))
-        os.unlink(tmp)
-
-
-class TestReverseComplement(unittest.TestCase):
-    def test_reverse_complement(self):
-        '''reverse_complement should correctly reverse complement each seq in a file'''
-        tmp = 'tmp.revcomp.fa'
-        tasks.reverse_complement(os.path.join(data_dir, 'sequences_test.fa'), tmp)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_revcomp.fa'), tmp))
-        os.unlink(tmp)
-
-
-class TestScaffoldsToContigs(unittest.TestCase):
-    def test_scaffolds_to_contigs(self):
-        '''Test scaffolds_to_contigs'''
-        tmp = 'tmp.contigs.fa'
-        tasks.scaffolds_to_contigs(os.path.join(data_dir, 'utils_test_scaffolds.fa'), tmp)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'utils_test_scaffolds.fa.to_contigs.fa'), tmp))
-        os.unlink(tmp)
-
-    def test_scaffolds_to_contigs_number_contigs(self):
-        '''Test scaffolds_to_contigs with contig numbering'''
-        tmp = 'tmp.contigs.fa'
-        tasks.scaffolds_to_contigs(os.path.join(data_dir, 'utils_test_scaffolds.fa'), tmp, number_contigs=True)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'utils_test_scaffolds.fa.to_contigs.number_contigs.fa'), tmp))
-        os.unlink(tmp)
-
-
-class TestSearchForSeq(unittest.TestCase):
-    def test_search_for_seq(self):
-        '''Test that sequence search finds all hits'''
-        tmp = 'tmp.search.fa'
-        tasks.search_for_seq(os.path.join(data_dir, 'sequences_test_search_string.fa'), tmp, 'AGA')
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_search_string.fa.hits'), tmp))
-        os.unlink(tmp)
-
-
-class TestTranslate(unittest.TestCase):
-    def test_translate(self):
-        '''Test translate works in each frame'''
-        tmp = 'tmp.translated.fa'
-        for i in range(3):
-            tasks.translate(os.path.join(data_dir, 'sequences_test_translate.fa'), tmp, frame=i)
-            self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_translate.fa.frame' + str(i)), tmp))
-
-        os.unlink(tmp)
-
-
-class TestTrim(unittest.TestCase):
-    def test_trim(self):
-        '''trim should correctly trim each seq in a file'''
-        tmp = 'tmp.trim.fq'
-        tasks.trim(os.path.join(data_dir, 'sequences_test_untrimmed.fq'), tmp, 2, 1)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_trimmed.fq'), tmp))
-        os.unlink(tmp)
-
-
-    def test_trim_Ns_at_end(self):
-        '''Test Ns at ends of sequences trimmed OK'''
-        tmp = 'tmp.trim.fa'
-        tasks.trim_Ns_at_end(os.path.join(data_dir, 'sequences_test_trim_Ns_at_end.fa'), tmp)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_trim_Ns_at_end.fa.trimmed'), tmp))
-        os.unlink(tmp)
-
-
-class TestFileToDict(unittest.TestCase):
-    def test_file_to_dict(self):
-        '''check file_to_dict fills dictionary correctly'''
-        d_test = {}
-        d = {}
-        tasks.file_to_dict(os.path.join(data_dir, 'sequences_test.fa'), d_test)
-        for i in range(1,5):
-            d[str(i)] = sequences.Fasta(str(i),'ACGTA')
-
-        self.assertSequenceEqual(d_test.keys(),d.keys())
-        for i in range(1,5):
-            key = str(i)
-            self.assertEqual(d_test[key].id, d[key].id)
-            self.assertEqual(d_test[key].seq, d[key].seq)
-
-
-class TestLengthsFromFai(unittest.TestCase):
-    def test_lengths_from_fai(self):
-        '''Check lengths_from_fai gets the length of each seq OK'''
-        d = {}
-        lengths = {str(x):x for x in range(1,5)}
-        tasks.lengths_from_fai(os.path.join(data_dir, 'sequences_test_fai_test.fa.fai'), d)
-        self.assertSequenceEqual(d.keys(), lengths.keys())
-        for i in d:
-            self.assertEqual(int(i), d[i])
-
-
-class TestSplit(unittest.TestCase):
-    def test_split_by_base_count(self):
-        '''Check that fasta/q files get split by base count correctly'''
-        infile = os.path.join(data_dir, 'sequences_test_split_test.fa')
-        outprefix = 'tmp.sequences_test_split_test.fa.test'
-        length2files = {2: ['1','2','3','4'],
-                        3: ['1','2','3'],
-                        4: ['1', '2', '3'],
-                        6: ['1', '2']}
-        for l in length2files:
-            tasks.split_by_base_count(infile, outprefix, l)
-            for x in range(len(length2files[l])):
-                file_index = str(length2files[l][x])
-                fname = outprefix + '.' + file_index
-                self.assertTrue(filecmp.cmp(fname, infile + '.' + str(l) + '.' + file_index))
-                os.unlink(fname)
-
-        # check that limiting the number of files works
-        tasks.split_by_base_count(infile, outprefix, 6, 2)
-        for i in range(1,4):
-            test_file = outprefix + '.' + str(i)
-            self.assertTrue(filecmp.cmp(test_file, os.path.join(data_dir, 'sequences_test_split_test.fa.6.limit2.') + str(i)))
-            os.unlink(test_file)
-
-        # check big sequence not broken
-        tasks.split_by_base_count(os.path.join(data_dir, 'sequences_test_split_test.long.fa'), outprefix, 2)
-        self.assertTrue(filecmp.cmp(outprefix + '.1', os.path.join(data_dir, 'sequences_test_split_test.long.fa.2.1')))
-        self.assertTrue(filecmp.cmp(outprefix + '.2', os.path.join(data_dir, 'sequences_test_split_test.long.fa.2.2')))
-        os.unlink(outprefix + '.1')
-        os.unlink(outprefix + '.2')
-
-    def test_split_by_fixed_size(self):
-        '''Test fasta/q file split by fixed size'''
-        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
-        outprefix = 'tmp.sequences_test_split'
-        tasks.split_by_fixed_size(infile, outprefix, 4, 1)
-  
-        for i in range(1,7,1):
-            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.' + str(i))
-            test = outprefix + '.' + str(i)
-            self.assertTrue(filecmp.cmp(test, correct))
-            os.unlink(test)
- 
-        test_coords = outprefix + '.coords'
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.coords'), test_coords))
-        os.unlink(test_coords)
-
-    def test_split_by_fixed_size_exclude_Ns(self):
-        infile = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa')
-        outprefix = 'tmp.sequences_test_split'
-        tasks.split_by_fixed_size(infile, outprefix, 4, 1, skip_if_all_Ns=True)
-  
-        for i in range(1,5,1):
-            correct = os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.' + str(i))
-            test = outprefix + '.' + str(i)
-            self.assertTrue(filecmp.cmp(test, correct))
-            os.unlink(test)
- 
-        test_coords = outprefix + '.coords'
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_split_fixed_size.fa.split.skip_if_all_Ns.coords'), test_coords))
-        os.unlink(test_coords)
-
-class TestCountSequences(unittest.TestCase):
-    def test_count_sequences(self):
-        '''Check that count_sequences does as expected'''
-        self.assertEqual(2, tasks.count_sequences(os.path.join(data_dir, 'sequences_test_good_file.fq')))
-        self.assertEqual(4, tasks.count_sequences(os.path.join(data_dir, 'sequences_test.fa')))
-        self.assertEqual(0, tasks.count_sequences(os.path.join(data_dir, 'sequences_test_empty_file')))
-
-class TestGetIds(unittest.TestCase):
-    def test_get_ids(self):
-        '''Check that IDs extracted correctly from fasta/q file'''
-        tmpfile = 'tmp.ids'
-        tasks.get_ids(os.path.join(data_dir, 'sequences_test.fa'), tmpfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.fa.ids'), tmpfile))
-        os.unlink(tmpfile)
-
-class TestFastaToFastq(unittest.TestCase):
-    def test_fasta_to_fastq(self):
-        '''Check fasta_to_fastq converts files as expected'''
-        tasks.fasta_to_fastq(os.path.join(data_dir, 'sequences_test.fa'),
-                             os.path.join(data_dir, 'sequences_test.fa.qual'),
-                             'tmp.fq')
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.fasta_to_fastq.fq'), 'tmp.fq'))
-
-        with self.assertRaises(tasks.Error):
-            tasks.fasta_to_fastq(os.path.join(data_dir, 'sequences_test.fa'),
-                                 os.path.join(data_dir, 'sequences_test.fa.qual.bad'),
-                                 'tmp.fq')
-
-        os.unlink('tmp.fq')
-
-
-class TestReplaceBases(unittest.TestCase):
-    def test_sequences_replace_bases(self):
-        '''Check that fasta file gets all bases replaced OK'''
-        tmpfile = 'tmp.replace_bases.fa'
-        tasks.replace_bases(os.path.join(data_dir, 'sequences_test_fastaq_replace_bases.fa'), tmpfile, 'T', 'X')
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_replace_bases.expected.fa'), tmpfile))
-        os.unlink(tmpfile)
-
-
-class TestStripIlluminaSuffix(unittest.TestCase):
-    def test_strip_illumina_suffix(self):
-        '''Check illumina suffixes stripped correctly off read names'''
-        tmpfile = 'tmp.stripped.fa'
-        tasks.strip_illumina_suffix(os.path.join(data_dir, 'sequences_test_strip_illumina_suffix.fq'), tmpfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_illumina_suffix.fq.stripped'), tmpfile))
-        os.unlink(tmpfile)
-      
-
-class TestToQuasrPrimers(unittest.TestCase):
-    def test_to_quasr_primers(self):
-        '''Check that fasta file gets converted to QUASR sequence file'''
-        tmpfile = 'tmp.primers'
-        tasks.to_quasr_primers(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.fa'), tmpfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_fastaq_to_quasr_primers.expected'), tmpfile))
-        os.unlink(tmpfile)
-
-
-class TestToFasta(unittest.TestCase):
-    def test_to_fasta(self):
-        '''Test to_fasta'''
-        tmpfile = 'tmp.to_fasta'
-        infiles = [
-            'sequences_test_good_file.fq',
-            'sequences_test_gffv3.gff',
-            'sequences_test_gffv3.no_FASTA_line.gff',
-            'sequences_test.embl',
-            'sequences_test.gbk',
-            'sequences_test_phylip.interleaved',
-            'sequences_test_phylip.interleaved2',
-            'sequences_test_phylip.sequential'
-        ]
-        infiles = [os.path.join(data_dir, x) for x in infiles]
-        expected_outfiles = [x + '.to_fasta' for x in infiles]
-
-        for i in range(len(infiles)):
-            tasks.to_fasta(infiles[i], tmpfile)
-            self.assertTrue(filecmp.cmp(expected_outfiles[i], tmpfile))
-
-        tasks.to_fasta(os.path.join(data_dir, 'sequences_test.fa'), tmpfile, line_length=3)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test.line_length3.fa'), tmpfile))
-        tasks.to_fasta(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa'), tmpfile, strip_after_first_whitespace=True)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_strip_after_whitespace.fa.to_fasta'), tmpfile))
-        os.unlink(tmpfile)
-
-
-class TestToUniqueByID(unittest.TestCase):
-    def test_to_unique_by_id(self):
-        '''Test to_unique_by_id()'''
-        tmpfile = 'tmp.unique_by_id.fa'
-        tasks.to_unique_by_id(os.path.join(data_dir, 'sequences_test_to_unique_by_id.fa'), tmpfile)
-        self.assertTrue(filecmp.cmp(os.path.join(data_dir, 'sequences_test_to_unique_by_id.fa.out'), tmpfile))
-        os.unlink(tmpfile)
-
-
-if __name__ == '__main__':
-    unittest.main()
-
diff --git a/fastaq/tests/utils_test.py b/fastaq/tests/utils_test.py
deleted file mode 100644
index 731c944..0000000
--- a/fastaq/tests/utils_test.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-
-import sys
-import os
-import filecmp
-import unittest
-from fastaq import utils
-
-modules_dir = os.path.dirname(os.path.abspath(utils.__file__))
-data_dir = os.path.join(modules_dir, 'tests', 'data')
-
-class TestUtils(unittest.TestCase):
-    def test_write_and_read(self):
-        '''open_file_write() and open_file_read() should do the right thing depending gzipped or not'''
-        for filename in ['utils.tmp', 'utils.tmp.gz', 'utils.tmp.bgz']:
-            f = utils.open_file_write(filename)
-            for i in range(3):
-                print(i, file=f)
-            utils.close(f)
-
-            counter = 0
-
-            f = utils.open_file_read(filename)
-            for line in f:
-                self.assertEqual(counter, int(line.strip()))
-                counter += 1
-            utils.close(f)
-
-            os.unlink(filename)
-
-        f = utils.open_file_read('-')
-        self.assertEqual(sys.stdin, f)
-        f = utils.open_file_write('-')
-        self.assertEqual(sys.stdout, f)
-
-    def test_raise_exception(self):
-        '''open_file_write() and open_file_read() should raise an exception when can't do the opening'''
-        with self.assertRaises(utils.Error):
-            utils.open_file_read('this_file_is_not_here_so_throw_error')
-        with self.assertRaises(utils.Error):
-            utils.open_file_read('this_file_is_not_here_so_throw_error.gz')
-        with self.assertRaises(utils.Error):
-            utils.open_file_read(os.path.join(data_dir, 'utils_test_not_really_zipped.gz'))
-
-        with self.assertRaises(utils.Error):
-            utils.open_file_write(os.path.join('not_a_directory', 'this_file_is_not_here_so_throw_error'))
-        with self.assertRaises(utils.Error):
-            utils.open_file_write(os.path.join('not_a_directory', 'this_file_is_not_here_so_throw_error.gz'))
-
-    def test_file_transpose(self):
-        '''Test that file_transpose() does what it should'''
-        infile = os.path.join(data_dir, 'utils_test_file_transpose.txt')
-        tmp_out = 'utils_test_file_transpose.tmp'
-        correct_file = os.path.join(data_dir, 'utils_test_file_transposed.txt')
-        utils.file_transpose(infile, tmp_out)
-        self.assertTrue(filecmp.cmp(tmp_out, correct_file))
-        os.unlink(tmp_out)
-
-    def test_system_call(self):
-        '''Test that system call appears to work and die as it should'''
-        test_file = os.path.join(data_dir, 'utils_test_system_call.txt')
-        tmp_out = 'utils_test_syscall.tmp'
-        utils.syscall('cat ' + test_file + ' > ' + tmp_out)
-        self.assertTrue(filecmp.cmp(tmp_out, test_file))
-        os.unlink(tmp_out)
-
-        with self.assertRaises(utils.Error):
-            utils.syscall('thisisveryunlikelytoebarealcommandandshouldthrowerror')
-
-        utils.syscall('echo "this is not the right string" > ' + tmp_out)
-        self.assertFalse(filecmp.cmp(tmp_out, test_file))
-        os.unlink(tmp_out)
-
-        s = utils.syscall_get_stdout('echo bingo')
-        self.assertListEqual(["bingo"], s)
-        with self.assertRaises(utils.Error):
-            utils.syscall_get_stdout('thisisveryunlikelytoebarealcommandandshouldthrowerror')
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/fastaq/utils.py b/fastaq/utils.py
deleted file mode 100644
index c30273b..0000000
--- a/fastaq/utils.py
+++ /dev/null
@@ -1,86 +0,0 @@
-import os
-import sys
-import subprocess
-import shlex
-
-class Error (Exception): pass
-
-def open_file_read(filename):
-    if filename == '-':
-        f = sys.stdin
-    elif filename.endswith('.gz'):
-        # first check that the file is OK according to gunzip
-        retcode = subprocess.call('gunzip -t ' + filename, shell=True)
-        if retcode != 0:
-            raise Error("Error opening for reading gzipped file '" + filename + "'")
-
-        # now open the file
-        f = os.popen('gunzip -c ' + filename)
-    else:
-        try:
-            f = open(filename)
-        except:
-            raise Error("Error opening for reading file '" + filename + "'")
-
-    return f
-
-
-def open_file_write(filename):
-    if filename == '-':
-        f = sys.stdout
-    elif filename.endswith('.gz'):
-        if not os.path.exists(os.path.abspath(os.path.dirname(filename))):
-            raise Error("Error opening for writing gzipped file '" + filename + "'")
-
-        try:
-            f = os.popen('gzip -9 -c > ' + filename, 'w')
-        except:
-            raise Error("Error opening for writing gzipped file '" + filename + "'")
-    else:
-        try:
-            f = open(filename, 'w')
-        except:
-            raise Error("Error opening for writing file '" + filename + "'")
-
-    return f
-
-
-def close(filehandle):
-    if filehandle not in [sys.stdout, sys.stderr]:
-        filehandle.close()
-
-
-def file_transpose(f_in, f_out, sep_in=None, sep_out='\t'):
-    rows = []
-    f = open_file_read(f_in)
-    for line in f:
-        rows.append(line.rstrip().split(sep_in))
-    close(f)
-
-    columns_out = max([len(x) for x in rows])
-
-    for r in rows:
-        r += ['.'] * (columns_out - len(r))
-
-    f = open_file_write(f_out)
-    for i in range(columns_out):
-        print(sep_out.join([str(rows[x][i]) for x in range(len(rows))]), file=f)
-
-    close(f)
-
-
-def syscall(cmd):
-    retcode = subprocess.call(cmd, shell=True)
-
-    if retcode != 0:
-        raise Error("Error in system call. Command was:\n" + cmd)
-
-
-def syscall_get_stdout(cmd):
-    try:
-        out = subprocess.Popen(shlex.split(cmd), stdout=subprocess.PIPE).communicate()[0].decode('utf-8').rstrip()
-        return out.split('\n')
-    except:
-        raise Error('Error in system call. I tried to run:\n' + str(cmd))
-
-
diff --git a/scripts/fastaq_capillary_to_pairs b/src/fastaq_capillary_to_pairs
similarity index 100%
rename from scripts/fastaq_capillary_to_pairs
rename to src/fastaq_capillary_to_pairs
diff --git a/scripts/fastaq_chunker b/src/fastaq_chunker
similarity index 100%
rename from scripts/fastaq_chunker
rename to src/fastaq_chunker
diff --git a/scripts/fastaq_count_sequences b/src/fastaq_count_sequences
similarity index 100%
rename from scripts/fastaq_count_sequences
rename to src/fastaq_count_sequences
diff --git a/scripts/fastaq_deinterleave b/src/fastaq_deinterleave
similarity index 100%
rename from scripts/fastaq_deinterleave
rename to src/fastaq_deinterleave
diff --git a/scripts/fastaq_enumerate_names b/src/fastaq_enumerate_names
similarity index 100%
rename from scripts/fastaq_enumerate_names
rename to src/fastaq_enumerate_names
diff --git a/scripts/fastaq_extend_gaps b/src/fastaq_extend_gaps
similarity index 100%
rename from scripts/fastaq_extend_gaps
rename to src/fastaq_extend_gaps
diff --git a/scripts/fastaq_fasta_to_fastq b/src/fastaq_fasta_to_fastq
similarity index 100%
rename from scripts/fastaq_fasta_to_fastq
rename to src/fastaq_fasta_to_fastq
diff --git a/scripts/fastaq_filter b/src/fastaq_filter
similarity index 100%
rename from scripts/fastaq_filter
rename to src/fastaq_filter
diff --git a/scripts/fastaq_get_ids b/src/fastaq_get_ids
similarity index 100%
rename from scripts/fastaq_get_ids
rename to src/fastaq_get_ids
diff --git a/scripts/fastaq_get_seq_flanking_gaps b/src/fastaq_get_seq_flanking_gaps
similarity index 100%
rename from scripts/fastaq_get_seq_flanking_gaps
rename to src/fastaq_get_seq_flanking_gaps
diff --git a/scripts/fastaq_insert_or_delete_bases b/src/fastaq_insert_or_delete_bases
similarity index 100%
rename from scripts/fastaq_insert_or_delete_bases
rename to src/fastaq_insert_or_delete_bases
diff --git a/scripts/fastaq_interleave b/src/fastaq_interleave
similarity index 100%
rename from scripts/fastaq_interleave
rename to src/fastaq_interleave
diff --git a/scripts/fastaq_make_random_contigs b/src/fastaq_make_random_contigs
similarity index 100%
rename from scripts/fastaq_make_random_contigs
rename to src/fastaq_make_random_contigs
diff --git a/scripts/fastaq_replace_bases b/src/fastaq_replace_bases
similarity index 100%
rename from scripts/fastaq_replace_bases
rename to src/fastaq_replace_bases
diff --git a/scripts/fastaq_reverse_complement b/src/fastaq_reverse_complement
similarity index 100%
rename from scripts/fastaq_reverse_complement
rename to src/fastaq_reverse_complement
diff --git a/scripts/fastaq_scaffolds_to_contigs b/src/fastaq_scaffolds_to_contigs
similarity index 100%
rename from scripts/fastaq_scaffolds_to_contigs
rename to src/fastaq_scaffolds_to_contigs
diff --git a/scripts/fastaq_search_for_seq b/src/fastaq_search_for_seq
similarity index 100%
rename from scripts/fastaq_search_for_seq
rename to src/fastaq_search_for_seq
diff --git a/scripts/fastaq_split_by_base_count b/src/fastaq_split_by_base_count
similarity index 100%
rename from scripts/fastaq_split_by_base_count
rename to src/fastaq_split_by_base_count
diff --git a/scripts/fastaq_strip_illumina_suffix b/src/fastaq_strip_illumina_suffix
similarity index 100%
rename from scripts/fastaq_strip_illumina_suffix
rename to src/fastaq_strip_illumina_suffix
diff --git a/scripts/fastaq_to_fasta b/src/fastaq_to_fasta
similarity index 100%
rename from scripts/fastaq_to_fasta
rename to src/fastaq_to_fasta
diff --git a/scripts/fastaq_to_mira_xml b/src/fastaq_to_mira_xml
similarity index 100%
rename from scripts/fastaq_to_mira_xml
rename to src/fastaq_to_mira_xml
diff --git a/scripts/fastaq_to_perfect_reads b/src/fastaq_to_perfect_reads
similarity index 100%
rename from scripts/fastaq_to_perfect_reads
rename to src/fastaq_to_perfect_reads
diff --git a/scripts/fastaq_to_quasr_primers_file b/src/fastaq_to_quasr_primers_file
similarity index 100%
rename from scripts/fastaq_to_quasr_primers_file
rename to src/fastaq_to_quasr_primers_file
diff --git a/scripts/fastaq_to_random_subset b/src/fastaq_to_random_subset
similarity index 100%
rename from scripts/fastaq_to_random_subset
rename to src/fastaq_to_random_subset
diff --git a/scripts/fastaq_to_tiling_bam b/src/fastaq_to_tiling_bam
similarity index 100%
rename from scripts/fastaq_to_tiling_bam
rename to src/fastaq_to_tiling_bam
diff --git a/scripts/fastaq_to_unique_by_id b/src/fastaq_to_unique_by_id
similarity index 100%
rename from scripts/fastaq_to_unique_by_id
rename to src/fastaq_to_unique_by_id
diff --git a/scripts/fastaq_translate b/src/fastaq_translate
similarity index 100%
rename from scripts/fastaq_translate
rename to src/fastaq_translate
diff --git a/scripts/fastaq_trim_Ns_at_end b/src/fastaq_trim_Ns_at_end
similarity index 100%
rename from scripts/fastaq_trim_Ns_at_end
rename to src/fastaq_trim_Ns_at_end
diff --git a/scripts/fastaq_trim_ends b/src/fastaq_trim_ends
similarity index 100%
rename from scripts/fastaq_trim_ends
rename to src/fastaq_trim_ends

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list