[med-svn] [python-pysam] 04/10: Imported Upstream version 0.8.4+ds
Afif Elghraoui
afif-guest at moszumanska.debian.org
Thu Nov 12 09:09:14 UTC 2015
This is an automated email from the git hooks/post-receive script.
afif-guest pushed a commit to branch master
in repository python-pysam.
commit 18e49038bca0bc7775fc8f6e3703b207815a4b91
Author: Afif Elghraoui <afif at ghraoui.name>
Date: Wed Nov 11 18:45:10 2015 -0800
Imported Upstream version 0.8.4+ds
---
README.rst | 5 +-
benchmark/AlignedSegment_bench.py | 43 +
doc/api.rst | 15 +-
doc/conf.py | 20 +-
doc/faq.rst | 26 +-
doc/glossary.rst | 34 +-
doc/release.rst | 86 +
doc/usage.rst | 22 +-
pysam/TabProxies.pxd | 94 -
pysam/__init__.py | 64 +-
pysam/calignedsegment.pxd | 94 +
pysam/calignedsegment.pyx | 2277 ++++++++++
pysam/calignmentfile.pxd | 71 +-
pysam/calignmentfile.pyx | 3595 +++++----------
pysam/cbcf.pxd | 2 +-
pysam/cbcf.pyx | 694 ++-
pysam/cfaidx.pxd | 54 +-
pysam/cfaidx.pyx | 470 +-
pysam/chtslib.pxd | 43 +-
pysam/chtslib.pyx | 97 +-
pysam/csamfile.pxd | 4 +-
pysam/csamfile.pyx | 2 +-
pysam/csamtools.pxd | 13 +-
pysam/csamtools.pyx | 63 +-
pysam/ctabix.pxd | 39 +-
pysam/ctabix.pyx | 312 +-
pysam/ctabixproxies.pxd | 59 +
pysam/{TabProxies.pyx => ctabixproxies.pyx} | 176 +-
pysam/cutils.pxd | 27 +
pysam/cutils.pyx | 214 +
pysam/cvcf.pxd | 40 -
pysam/cvcf.pyx | 152 +-
pysam/htslib_util.h | 3 -
pysam/pysam_util.c | 2 +-
pysam/tabix_util.c | 5 +-
pysam/version.py | 2 +-
requires.txt => requirements.txt | 0
save/pysam_test2.6.py | 2 +-
setup.py | 228 +-
tests/AlignedSegment_test.py | 236 +-
tests/AlignmentFile_test.py | 314 +-
tests/SamFile_test.py | 74 +-
tests/TestUtils.py | 6 +-
tests/faidx_test.py | 99 +-
tests/pysam_data/Makefile | 2 +-
tests/pysam_data/faidx_ex1.fa | 6540 +++++++++++++++++++++++++++
tests/pysam_data/{ex1.fq => faidx_ex1.fq} | 0
tests/samtools_test.py | 93 +-
tests/tabix_test.py | 46 +-
49 files changed, 12538 insertions(+), 4021 deletions(-)
diff --git a/README.rst b/README.rst
index cbe09d2..cf3c260 100644
--- a/README.rst
+++ b/README.rst
@@ -2,6 +2,9 @@
Pysam
=====
+.. image:: https://travis-ci.org/pysam-developers/pysam.svg
+ :alt: pysam build status
+
Pysam is a python module for reading and manipulating files in the
SAM/BAM format. The SAM/BAM format is a way to store efficiently large
numbers of alignments (`Li 2009`_), such as those routinely created by
@@ -15,7 +18,7 @@ The latest version is available through
type::
pip install pysam
- .
+
Pysam documentation is available through https://readthedocs.org/ from
`here <http://pysam.readthedocs.org/en/latest/>`_
diff --git a/benchmark/AlignedSegment_bench.py b/benchmark/AlignedSegment_bench.py
new file mode 100644
index 0000000..98286d0
--- /dev/null
+++ b/benchmark/AlignedSegment_bench.py
@@ -0,0 +1,43 @@
+"""Benchmarking module for AlignedSegment functionality"""
+
+import timeit
+
+iterations = 10000
+repeats = 5
+
+setup_binary_tag = """
+import pysam
+import array
+read = pysam.AlignedSegment()
+read.set_tag('FZ', array.array('H', range(1000)))
+"""
+
+setup_binary_tag_from_file = """
+import pysam
+with pysam.AlignmentFile("../tests/pysam_data/example_btag.bam", "rb") as inf:
+ read = inf.fetch().next()
+"""
+
+def test_read_binary_get_tag(read):
+ tags = read.get_tag('FZ')
+
+def test_read_and_process_binary_get_tag(read):
+ tags = sum(read.get_tag('FZ'))
+
+tests = (
+ ("test_read_binary_get_tag", "setup_binary_tag"),
+ ("test_read_binary_get_tag", "setup_binary_tag_from_file"),
+ ("test_read_and_process_binary_get_tag", "setup_binary_tag"),
+ )
+
+for repeat in range(repeats):
+ print ("# repeat=", repeat)
+ for testf, setup_name in tests:
+ setup = locals()[setup_name]
+ setup += """\nfrom __main__ import %s""" % testf
+ #try:
+ t = timeit.timeit("%s(read)" % testf, number=iterations, setup=setup)
+ #except AttributeError, msg:
+ # print msg
+ # continue
+ print ("%5.2f\t%s\t%s" % (t,testf, setup_name))
diff --git a/doc/api.rst b/doc/api.rst
index d700ac5..c756959 100644
--- a/doc/api.rst
+++ b/doc/api.rst
@@ -58,13 +58,14 @@ reads are represented as :class:`~pysam.PileupRead` objects in the
import pysam
samfile = pysam.AlignmentFile("ex1.bam", "rb" )
for pileupcolumn in samfile.pileup("chr1", 100, 120):
- print ("\ncoverage at base %s = %s" %
+ print ("\ncoverage at base %s = %s" %
(pileupcolumn.pos, pileupcolumn.n))
for pileupread in pileupcolumn.pileups:
- if not pileupread.is_del and not pileupread.is_refskip: # query position is None if is_del or is_refskip is set.
+ if not pileupread.is_del and not pileupread.is_refskip:
+ # query position is None if is_del or is_refskip is set.
print ('\tbase in read %s = %s' %
- (pileupread.alignment.query_name,
- pileupread.alignment.query_sequence[pileupread.query_position]))
+ (pileupread.alignment.query_name,
+ pileupread.alignment.query_sequence[pileupread.query_position]))
samfile.close()
@@ -199,6 +200,12 @@ Fastq files
.. autoclass:: pysam.FastqFile
:members:
+VCF files
+---------
+.. autoclass:: pysam.VariantFile
+ :members:
+.. autoclass:: pysam.VariantHeader
+ :members:
diff --git a/doc/conf.py b/doc/conf.py
index 19a4563..ede1809 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -16,21 +16,25 @@ import sys, os, glob
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
-_libdir = "../build/lib.%s-%s-%s.%s" % (os.uname()[0].lower(), os.uname()[4],sys.version_info[0], sys.version_info[1] )
-if os.path.exists( _libdir ):
- sys.path.insert(0, os.path.abspath( _libdir ) )
+_libdir = "../build/lib.%s-%s-%s.%s" % (os.uname()[0].lower(), os.uname()[4],
+ sys.version_info[0], sys.version_info[1])
+if os.path.exists(_libdir):
+ sys.path.insert(0, os.path.abspath(_libdir))
# -- General configuration -----------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
-extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo',
- 'sphinx.ext.ifconfig',
- 'sphinx.ext.intersphinx']
+extensions = ['sphinx.ext.autodoc',
+ 'sphinx.ext.autosummary',
+ 'sphinx.ext.todo',
+ 'sphinx.ext.ifconfig',
+ 'sphinx.ext.intersphinx',
+# 'numpydoc']
+ 'sphinx.ext.napoleon']
intersphinx_mapping = {'python': ('http://docs.python.org/3.2', None)}
-
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
@@ -61,6 +65,8 @@ rst_epilog = '''
'''
+autosummary_generate = True
+
# The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the
# built documents.
diff --git a/doc/faq.rst b/doc/faq.rst
index 412a647..b414305 100644
--- a/doc/faq.rst
+++ b/doc/faq.rst
@@ -38,10 +38,10 @@ The following code will cause unexpected behaviour::
samfile = pysam.AlignmentFile("pysam_ex1.bam", "rb")
iter1 = samfile.fetch("chr1")
- print iter1.next().reference_id
+ print (iter1.next().reference_id)
iter2 = samfile.fetch("chr2")
- print iter2.next().reference_id
- print iter1.next().reference_id
+ print (iter2.next().reference_id)
+ print (iter1.next().reference_id)
This will give the following output::
@@ -61,10 +61,10 @@ chr2. The correct way to work with multiple iterators is::
samfile = pysam.AlignmentFile("pysam_ex1.bam", "rb")
iter1 = samfile.fetch("chr1", all)
- print iter1.next().reference_id
+ print (iter1.next().reference_id)
iter2 = samfile.fetch("chr2")
- print iter2.next().reference_id
- print iter1.next().reference_id
+ print (iter2.next().reference_id)
+ print (iter1.next().reference_id)
Here, the output is::
@@ -100,7 +100,19 @@ in the iteration by adding the ``until_eof=True`` flag::
bf = pysam.AlignemFile(fname, "rb")
for r in bf.fetch(until_eof=True):
if r.is_unmapped:
- print "read is unmapped"
+ print ("read is unmapped")
+
+I can't call AlignmentFile.fetch on a file without index
+========================================================
+
+:meth:`~pysam.AlignmentFile.fetch` requires an index when
+iterating over a SAM/BAM file. To iterate over a file without
+index, use the ``until_eof=True`::
+
+ bf = pysam.AlignemFile(fname, "rb")
+ for r in bf.fetch(until_eof=True):
+ print (r)
+
BAM files with a large number of reference sequences is slow
============================================================
diff --git a/doc/glossary.rst b/doc/glossary.rst
index e5c69c2..f40bcfb 100644
--- a/doc/glossary.rst
+++ b/doc/glossary.rst
@@ -11,11 +11,18 @@ Glossary
``[ (0,3), (1,5), (0,2) ]`` refers to an alignment with 3 matches, 5 insertions
and another 2 matches.
- region
- A genomic region, stated relative to a reference sequence. A region consists of
- reference name ('chr1'), start (100000), and end (200000).
- 0-based coordinates. Can be expressed as a string ('chr1:10000:20000')
-
+ region
+ A genomic region, stated relative to a reference sequence. A
+ region consists of reference name ('chr1'), start (10000), and
+ end (20000). Start and end can be omitted for regions spanning
+ a whole chromosome. If end is missing, the region will span from
+ start to the end of the chromosome. Within pysam, coordinates
+ are 0-based, half-open intervals, i.e., the position 10,000 is
+ part of the interval, but 20,000 is not. An exception are
+ :term:`samtools` compatible region strings such as
+ 'chr1:10000:20000', which are closed, i.e., both positions 10,000
+ and 20,000 are part of the interval.
+
column
Reads that are aligned to a base in the :term:`reference` sequence.
@@ -85,3 +92,20 @@ Glossary
sequence will not be part of the alignment record, in contrast
to :term:`soft clipped` reads.
+ VCF
+ Variant call format
+
+ BCF
+ Binary :term:`VCF`
+
+ tabix
+ Utility in the htslib package to index :term:`bgzip` compressed
+ files.
+
+ faidx
+ Utility in the samtools package to index :term:`fasta` formatted
+ files.
+
+ bgzip
+ Utility in the htslib package to block compress genomic data
+ files.
diff --git a/doc/release.rst b/doc/release.rst
index 29d21ab..e02c818 100644
--- a/doc/release.rst
+++ b/doc/release.rst
@@ -2,6 +2,92 @@
Release notes
=============
+Release 0.8.4
+=============
+
+This release contains numerous bugfixes and a first implementation of
+a pythonic interface to VCF/BCF files. Note that this code is still
+incomplete and preliminary, but does offer a nearly complete immutable
+Pythonic interface to VCF/BCF metadata and data with reading and
+writing capability.
+
+Potential isses when upgrading from v0.8.3:
+
+* binary tags are now returned as python arrays
+* renamed several methods for pep8 compatibility, old names still retained for
+ backwards compatibility, but should be considered deprecated.
+ * gettid() is now get_tid()
+ * getrname() is now get_reference_name()
+ * parseRegion() is now parse_region()
+* faidx now returns strings and not binary strings in py3.
+* The cython components have been broken up into smaller files with
+ more specific content. This will affect users using the cython
+ interfaces.
+
+Edited list of commit log changes:
+
+* fixes AlignmentFile.check_index to return True
+* add RG/PM header tag - closes #179
+* add with_seq option to get_aligned_pairs
+* use char * inside reconsituteReferenceSequence
+* add soft clipping for get_reference_sequence
+* add get_reference_sequence
+* queryEnd now computes length from cigar string if no sequence present, closes #176
+* tolerate missing space at end of gtf files, closes #162
+* do not raise Error when receiving output on stderr
+* add docu about fetching without index, closes #170
+* FastaFile and FastxFile now return strings in python3, closes #173
+* py3 compat: relative -> absolute imports.
+* add reference_name and next_reference_name attributes to AlignedSegment
+* add function signatures to cvcf cython. Added note about other VCF code.
+* add context manager functions to FastaFile
+* add reference_name and next_reference_name attributes to AlignedSegment
+* PileupColumn also gets a reference_name attribute.
+* add context manager functions to FastaFile
+* TabixFile.header for remote files raises AttributeError, fixes #157
+* add context manager interface to TabixFile, closes #165
+* change ctypedef enum to typedef enum for cython 0.23
+* add function signatures to cvcf cython, also added note about other VCF code
+* remove exception for custom upper-case header record tags.
+* rename VALID_HEADER_FIELDS to KNOWN_HEADER_FIELDS
+* fix header record tag parsing for custom tags.
+* use cython.str in count_coverage, fixes #141
+* avoid maketrans (issues with python3)
+* refactoring: AlignedSegment now in separate module
+* do not execute remote tests if URL not available
+* fix the unmapped count, incl reads with no SQ group
+* add raw output to tags
+* added write access for binary tags
+* bugfix in call to resize
+* implemented writing of binary tags from arrays
+* implemented convert_binary_tag to use arrays
+* add special cases for reads that are unmapped or whose mates are unmapped.
+* rename TabProxies to ctabixproxies
+* remove underscores from utility functions
+* move utility methods into cutils
+* remove callback argument to fetch - closes #128
+* avoid calling close in dealloc
+* add unit tests for File object opening
+* change AlignmentFile.open to filepath_or_object
+* implement copy.copy, close #65
+* add chaching of array attributes in AlignedSegment, closes #121
+* add export of Fastafile
+* remove superfluous pysam_dispatch
+* use persist option in FastqFile
+* get_tag: expose tag type if requested with `with_value_type`
+* fix to allow reading vcf record info via tabix-based vcf reader
+* add pFastqProxy and pFastqFile objects to make it possible to work with multiple fastq records per file handle, unlike FastqProxy/FastqFile.
+* release GIL around htslib IO operations
+* More work on read/write support, API improvements
+* add `phased` property on `VariantRecordSample`
+* add mutable properties to VariantRecord
+* BCF fixes and start of read/write support
+* VariantHeaderRecord objects now act like mappings for attributes.
+* add VariantHeader.alts dict from alt ID->Record.
+* Bug fix to strong representation of structured header records.
+* VariantHeader is now mutable
+
+
Release 0.8.3
=============
diff --git a/doc/usage.rst b/doc/usage.rst
index f4dd4d5..e005893 100644
--- a/doc/usage.rst
+++ b/doc/usage.rst
@@ -265,6 +265,26 @@ form:
.. Calling indels works along the same lines, using the :class:`pysam.IteratorIndelCalls`
.. and :class:`pysam.IteratorIndelCaller`.
+
+====================================
+Working with VCF/BCF formatted files
+====================================
+
+To iterate through a VCF/BCF formatted file tabular file use
+:class:`~pysam.VariantFile`::
+
+ from pysam import VariantFile
+
+ bcf_in = VariantFile("test.bcf") # auto-detect input format
+ bcf_out = VariantFile('-', 'w', header=bcf_in.header)
+
+ for rec in bcf_in.fetch('chr1', 100000, 200000):
+ bcf_out.write(rec)
+
+.. note::
+
+ The VCF/BCF API is preliminary and incomplete.
+
===============
Extending pysam
===============
@@ -292,7 +312,7 @@ flagstat command and consists of three files:
2. The cython implementation :file:`_pysam_flagstat.pyx`. This script
imports the pysam API via::
- from pysam.calignmentfile cimport AlignementFile, AlignedSegment
+ from pysam.calignmentfile cimport AlignmentFile, AlignedSegment
This statement imports, amongst others, :class:`AlignedSegment`
into the namespace. Speed can be gained from declaring
diff --git a/pysam/TabProxies.pxd b/pysam/TabProxies.pxd
deleted file mode 100644
index 22211eb..0000000
--- a/pysam/TabProxies.pxd
+++ /dev/null
@@ -1,94 +0,0 @@
-cdef extern from "stdlib.h":
- void free(void *)
- void *malloc(size_t)
- void *calloc(size_t,size_t)
- void *realloc(void *,size_t)
- int c_abs "abs" (int)
- int c_abs "abs" (int)
- int atoi( char *nptr)
- long atol( char *nptr)
- double atof( char *nptr)
-
-cdef extern from "Python.h":
- ctypedef struct FILE
- char *fgets(char *str, int size, FILE *ifile)
- int feof(FILE *stream)
- size_t strlen(char *s)
- size_t getline(char **lineptr, size_t *n, FILE *stream)
- char *strstr(char *, char *)
- char *strchr(char *string, int c)
- int fileno(FILE *stream)
-
-cdef extern from "string.h":
- int strcmp(char *s1, char *s2)
- int strncmp(char *s1,char *s2,size_t len)
- char *strcpy(char *dest,char *src)
- char *strncpy(char *dest,char *src, size_t len)
- char *strdup(char *)
- char *strcat(char *,char *)
- size_t strlen(char *s)
- int memcmp( void * s1, void *s2, size_t len )
- void *memcpy(void *dest, void *src, size_t n)
- void *memchr(void *s, int c, size_t n)
-
-cdef extern from "stdint.h":
- ctypedef int int64_t
- ctypedef int int32_t
- ctypedef int uint32_t
- ctypedef int uint8_t
- ctypedef int uint64_t
-
-cdef class TupleProxy:
-
- cdef:
- char * data
- char ** fields
- int nfields
- int index
- int nbytes
- int offset
- bint is_modified
-
- cdef encoding
-
- cdef int getMaxFields(self)
- cdef int getMinFields(self)
-# cdef char * _getindex(self, int idx)
-
- cdef take(self, char * buffer, size_t nbytes)
- cdef present(self, char * buffer, size_t nbytes)
- cdef copy(self, char * buffer, size_t nbytes)
- cdef update(self, char * buffer, size_t nbytes)
-
-cdef class GTFProxy(TupleProxy) :
-
- cdef:
- char * _attributes
- cdef bint hasOwnAttributes
-
- cdef int getMaxFields(self)
- cdef int getMinFields(self)
- cdef char * getAttributes( self )
-
-cdef class NamedTupleProxy(TupleProxy) :
- pass
-
-cdef class BedProxy(NamedTupleProxy) :
-
- cdef:
- char * contig
- uint32_t start
- uint32_t end
- int bedfields
-
- cdef int getMaxFields(self)
- cdef int getMinFields(self)
- cdef update(self, char * buffer, size_t nbytes)
-
-cdef class VCFProxy(NamedTupleProxy) :
-
- cdef:
- char * contig
- uint32_t pos
-
- cdef update( self, char * buffer, size_t nbytes )
diff --git a/pysam/__init__.py b/pysam/__init__.py
index efe39fd..32f8cfd 100644
--- a/pysam/__init__.py
+++ b/pysam/__init__.py
@@ -1,13 +1,18 @@
from pysam.libchtslib import *
+from pysam.cutils import *
+import pysam.cutils as cutils
+
+import pysam.cfaidx as cfaidx
+from pysam.cfaidx import *
import pysam.ctabix as ctabix
from pysam.ctabix import *
import pysam.csamfile as csamfile
from pysam.csamfile import *
import pysam.calignmentfile as calignmentfile
from pysam.calignmentfile import *
-import pysam.cfaidx as cfaidx
-from pysam.cfaidx import *
+import pysam.calignedsegment as calignedsegment
+from pysam.calignedsegment import *
import pysam.cvcf as cvcf
from pysam.cvcf import *
import pysam.cbcf as cbcf
@@ -30,14 +35,13 @@ class SamtoolsError(Exception):
class SamtoolsDispatcher(object):
- '''samtools dispatcher.
-
- Emulates the samtools command line as module calls.
+ '''The samtools dispatcher emulates the samtools command line as
+ module calls.
Captures stdout and stderr.
- Raises a :class:`pysam.SamtoolsError` exception in case
- samtools exits with an error code other than 0.
+ Raises a :class:`pysam.SamtoolsError` exception in case samtools
+ exits with an error code other than 0.
Some command line options are associated with parsers. For
example, the samtools command "pileup -c" creates a tab-separated
@@ -46,8 +50,8 @@ class SamtoolsDispatcher(object):
will be processed in order checking for the presence of each
option.
- If no parser is given or no appropriate parser is found, the
- stdout output of samtools commands will be returned.
+ If no parser is given or no appropriate parser is found, the stdout
+ output of samtools commands will be returned.
'''
@@ -60,7 +64,11 @@ class SamtoolsDispatcher(object):
self.stderr = []
def __call__(self, *args, **kwargs):
- '''execute a samtools command
+ '''execute a samtools command.
+
+ Keyword arguments:
+ catch_stdout -- redirect stdout from the samtools command and return as variable (default True)
+ raw -- ignore any parsers associated with this samtools command.
'''
retval, stderr, stdout = csamtools._samtools_dispatch(
self.dispatch, args, catch_stdout=kwargs.get("catch_stdout", True))
@@ -69,19 +77,23 @@ class SamtoolsDispatcher(object):
raise SamtoolsError(
'csamtools returned with error %i: %s' %
(retval, "\n".join(stderr)))
+
self.stderr = stderr
- # samtools commands do not propagate the return code correctly.
- # I have thus added this patch to throw if there is output on stderr.
- # Note that there is sometimes output on stderr that is not an error,
- # for example: [sam_header_read2] 2 sequences loaded.
- # Ignore messages like these
- stderr = [x for x in stderr
- if not (x.startswith("[sam_header_read2]") or
- x.startswith("[bam_index_load]") or
- x.startswith("[bam_sort_core]") or
- x.startswith("[samopen] SAM header is present"))]
- if stderr:
- raise SamtoolsError("\n".join(stderr))
+
+ # Uncommented for samtools 1.2
+ # # samtools commands do not propagate the return code correctly.
+ # # I have thus added this patch to throw if there is output on stderr.
+ # # Note that there is sometimes output on stderr that is not an error,
+ # # for example: [sam_header_read2] 2 sequences loaded.
+ # # Ignore messages like these
+ # stderr = [x for x in stderr
+ # if not (x.startswith("[sam_header_read2]") or
+ # x.startswith("[bam_index_load]") or
+ # x.startswith("[bam_sort_core]") or
+ # x.startswith("[samopen] SAM header is present"))]
+
+ # if stderr:
+ # raise SamtoolsError("\n".join(stderr))
# call parser for stdout:
if not kwargs.get("raw") and stdout and self.parsers:
@@ -144,12 +156,14 @@ for key, options in SAMTOOLS_DISPATCH.items():
# hack to export all the symbols from separate modules
__all__ = \
- libchtslib.__all__ + \
- ctabix.__all__ + \
+ libchtslib.__all__ +\
+ cutils.__all__ +\
+ ctabix.__all__ +\
cvcf.__all__ +\
cbcf.__all__ +\
cfaidx.__all__ +\
calignmentfile.__all__ +\
+ calignedsegment.__all__ +\
csamfile.__all__ +\
["SamtoolsError", "SamtoolsDispatcher"] +\
list(SAMTOOLS_DISPATCH) +\
@@ -179,7 +193,7 @@ def get_libraries():
dirname = os.path.abspath(os.path.join(os.path.dirname(__file__)))
return [os.path.join(dirname, x) for x in (
'libchtslib.so',
- 'TabProxies.so',
+ 'ctabixproxies.so',
'cfaidx.so',
'csamfile.so',
'cvcf.so',
diff --git a/pysam/calignedsegment.pxd b/pysam/calignedsegment.pxd
new file mode 100644
index 0000000..ce82d88
--- /dev/null
+++ b/pysam/calignedsegment.pxd
@@ -0,0 +1,94 @@
+from pysam.chtslib cimport *
+
+cdef extern from "htslib_util.h":
+
+ # add *nbytes* into the variable length data of *src* at *pos*
+ bam1_t * pysam_bam_update(bam1_t * b,
+ size_t nbytes_old,
+ size_t nbytes_new,
+ uint8_t * pos)
+
+ # now: static
+ int aux_type2size(int)
+
+ char * pysam_bam_get_qname(bam1_t * b)
+ uint32_t * pysam_bam_get_cigar(bam1_t * b)
+ uint8_t * pysam_bam_get_seq(bam1_t * b)
+ uint8_t * pysam_bam_get_qual(bam1_t * b)
+ uint8_t * pysam_bam_get_aux(bam1_t * b)
+ int pysam_bam_get_l_aux(bam1_t * b)
+ char pysam_bam_seqi(uint8_t * s, int i)
+
+ uint16_t pysam_get_bin(bam1_t * b)
+ uint8_t pysam_get_qual(bam1_t * b)
+ uint8_t pysam_get_l_qname(bam1_t * b)
+ uint16_t pysam_get_flag(bam1_t * b)
+ uint16_t pysam_get_n_cigar(bam1_t * b)
+ void pysam_set_bin(bam1_t * b, uint16_t v)
+ void pysam_set_qual(bam1_t * b, uint8_t v)
+ void pysam_set_l_qname(bam1_t * b, uint8_t v)
+ void pysam_set_flag(bam1_t * b, uint16_t v)
+ void pysam_set_n_cigar(bam1_t * b, uint16_t v)
+ void pysam_update_flag(bam1_t * b, uint16_t v, uint16_t flag)
+
+
+from pysam.calignmentfile cimport AlignmentFile
+ctypedef AlignmentFile AlignmentFile_t
+
+cdef bytes TagToString(tuple tagtup)
+
+# Note: need to declare all C fields and methods here
+cdef class AlignedSegment:
+
+ # object that this AlignedSegment represents
+ cdef bam1_t * _delegate
+
+ # the file from which this AlignedSegment originates (can be None)
+ cdef AlignmentFile _alignment_file
+
+ # caching of array properties for quick access
+ cdef object cache_query_qualities
+ cdef object cache_query_alignment_qualities
+ cdef object cache_query_sequence
+ cdef object cache_query_alignment_sequence
+
+ # add an alignment tag with value to the AlignedSegment
+ # an existing tag of the same name will be replaced.
+ cpdef set_tag(self, tag, value, value_type=?, replace=?)
+
+ # add an alignment tag with value to the AlignedSegment
+ # an existing tag of the same name will be replaced.
+ cpdef get_tag(self, tag, with_value_type=?)
+
+ # return true if tag exists
+ cpdef has_tag(self, tag)
+
+ # returns a valid sam alignment string
+ cpdef bytes tostring(self, AlignmentFile_t handle)
+
+ # returns the aux tag fields as a string.
+ cdef bytes get_tag_string(self)
+
+
+cdef class PileupColumn:
+ cdef bam_pileup1_t ** plp
+ cdef int tid
+ cdef int pos
+ cdef int n_pu
+ cdef AlignmentFile _alignment_file
+
+
+cdef class PileupRead:
+ cdef AlignedSegment _alignment
+ cdef int32_t _qpos
+ cdef int _indel
+ cdef int _level
+ cdef uint32_t _is_del
+ cdef uint32_t _is_head
+ cdef uint32_t _is_tail
+ cdef uint32_t _is_refskip
+
+# factor methods
+cdef makeAlignedSegment(bam1_t * src, AlignmentFile alignment_file)
+cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos, int n_pu, AlignmentFile alignment_file)
+cdef inline makePileupRead(bam_pileup1_t * src, AlignmentFile alignment_file)
diff --git a/pysam/calignedsegment.pyx b/pysam/calignedsegment.pyx
new file mode 100644
index 0000000..f2b07a1
--- /dev/null
+++ b/pysam/calignedsegment.pyx
@@ -0,0 +1,2277 @@
+# cython: embedsignature=True
+# cython: profile=True
+###############################################################################
+###############################################################################
+# Cython wrapper for SAM/BAM/CRAM files based on htslib
+###############################################################################
+# The principal classes defined in this module are:
+#
+# class AlignedSegment an aligned segment (read)
+#
+# class PileupColumn a collection of segments (PileupRead) aligned to
+# a particular genomic position.
+#
+# class PileupRead an AlignedSegment aligned to a particular genomic
+# position. Contains additional attributes with respect
+# to this.
+#
+# Additionally this module defines numerous additional classes that are part
+# of the internal API. These are:
+#
+# Various iterator classes to iterate over alignments in sequential (IteratorRow)
+# or in a stacked fashion (IteratorColumn):
+#
+# class IteratorRow
+# class IteratorRowRegion
+# class IteratorRowHead
+# class IteratorRowAll
+# class IteratorRowAllRefs
+# class IteratorRowSelection
+#
+###############################################################################
+#
+# The MIT License
+#
+# Copyright (c) 2015 Andreas Heger
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+###############################################################################
+import re
+import array
+import ctypes
+import struct
+
+cimport cython
+from cpython cimport array as c_array
+from cpython.version cimport PY_MAJOR_VERSION
+from cpython cimport PyErr_SetString, PyBytes_FromStringAndSize
+from libc.string cimport strchr
+
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport qualities_to_qualitystring, qualitystring_to_array, \
+ array_to_qualitystring
+
+# Constants for binary tag conversion
+cdef char * htslib_types = 'cCsSiIf'
+cdef char * parray_types = 'bBhHiIf'
+
+# translation tables
+
+# cigar code to character and vice versa
+cdef char* CODE2CIGAR= "MIDNSHP=X"
+
+if PY_MAJOR_VERSION >= 3:
+ CIGAR2CODE = dict([y, x] for x, y in enumerate(CODE2CIGAR))
+else:
+ CIGAR2CODE = dict([ord(y), x] for x, y in enumerate(CODE2CIGAR))
+
+CIGAR_REGEX = re.compile("(\d+)([MIDNSHP=X])")
+
+#####################################################################
+# typecode guessing
+cdef inline char map_typecode_htslib_to_python(uint8_t s):
+ """map an htslib typecode to the corresponding python typecode
+ to be used in the struct or array modules."""
+
+ # map type from htslib to python array
+ cdef char * f = strchr(htslib_types, s)
+ if f == NULL:
+ raise ValueError("unknown htslib tag typecode '%s'" % chr(s))
+ return parray_types[f - htslib_types]
+
+cdef inline uint8_t map_typecode_python_to_htslib(char s):
+ """determine value type from type code of array"""
+ cdef char * f = strchr(parray_types, s)
+ if f == NULL:
+ raise ValueError(
+ "unknown conversion for array typecode '%s'" % s)
+ return htslib_types[f - parray_types]
+
+# optional tag data manipulation
+cdef convert_binary_tag(uint8_t * tag):
+ """return bytesize, number of values and array of values
+ in aux_data memory location pointed to by tag."""
+ cdef uint8_t auxtype
+ cdef uint8_t byte_size
+ cdef int32_t nvalues
+ # get byte size
+ auxtype = tag[0]
+ byte_size = aux_type2size(auxtype)
+ tag += 1
+ # get number of values in array
+ nvalues = (<int32_t*>tag)[0]
+ tag += 4
+
+ # define python array
+ cdef c_array.array c_values = array.array(
+ chr(map_typecode_htslib_to_python(auxtype)))
+ c_array.resize(c_values, nvalues)
+
+ # copy data
+ memcpy(c_values.data.as_voidptr, <uint8_t*>tag, nvalues * byte_size)
+
+ # no need to check for endian-ness as bam1_core_t fields
+ # and aux_data are in host endian-ness. See sam.c and calls
+ # to swap_data
+ return byte_size, nvalues, c_values
+
+
+cdef bytes TagToString(tuple tagtup):
+ cdef c_array.array b_aux_arr
+ cdef char value_type = tagtup[2]
+ cdef char* tag = tagtup[0]
+ cdef double value_double
+ cdef long value_int
+ cdef bytes value_bytes
+ cdef long i, min_value
+ cdef double f
+ cdef cython.str ret
+ cdef size_t size
+ if(value_type in ['c', 'C', 'i', 'I', 's', 'S']):
+ value_int = tagtup[1]
+ ret = tag + ":i:%s" % value_int
+ elif(value_type in ['f', 'F', 'd', 'D']):
+ value_float = tagtup[1]
+ ret = tag + ":f:%s" % (value_float)
+ elif(value_type == "Z"):
+ value_bytes = tagtup[1]
+ ret = tag + ":Z:" + value_bytes
+ elif(value_type == "B"):
+ if(isinstance(tagtup[1], array.array)):
+ b_aux_arr = tagtup[1]
+ else:
+ if(isinstance(tagtup[1][0], float)):
+ if(len(tagtup[1]) == 1):
+ return <bytes> (tag + ":B:f%s," % tagtup[1][0])
+ else:
+ return <bytes> (tag + ":B:f" +
+ ",".join([str(f) for f in tagtup[1]]))
+ else:
+ b_aux_arr = array('l', tagtup[1])
+ # Choose long to accommodate any size integers.
+ size = sizeof(b_aux_arr)
+ min_value = min(b_aux_arr)
+ length = len(b_aux_arr)
+ if(size == 1):
+ if(min_value < 0):
+ ret = tag + ":B:c," + ",".join([str(i) for i in b_aux_arr])
+ else:
+ ret = tag + ":B:C," + ",".join([str(i) for i in b_aux_arr])
+ elif(size == 2):
+ if(min_value < 0):
+ ret = tag + ":B:i," + ",".join([str(i) for i in b_aux_arr])
+ else:
+ ret = tag + ":B:I," + ",".join([str(i) for i in b_aux_arr])
+ else: # size == 4. Removed check to compile to switch statement.
+ if(min_value < 0):
+ ret = tag + ":B:s," + ",".join([str(i) for i in b_aux_arr])
+ else:
+ ret = tag + ":B:S," + ",".join([str(i) for i in b_aux_arr])
+ elif(value_type == "H"):
+ ret = tag + ":H:" + "".join([hex(i)[2:] for i in tagtup[1]])
+ elif(value_type == "A"):
+ ret = tag + ":A:" + tagtup[1]
+ else:
+ # Unrecognized character - returning the string as it was provided.
+ # An exception is not being raised because that prevents cython
+ # from being able to compile this into a switch statement for
+ # performance.
+ ret = "%s:%s:%s" % (tag, tagtup[2], tagtup[1])
+ return <bytes> ret
+
+
+cdef inline uint8_t get_value_code(value, value_type=None):
+ '''guess type code for a *value*. If *value_type* is None,
+ the type code will be inferred based on the Python type of
+ *value*'''
+ cdef uint8_t typecode
+ cdef char * _char_type
+
+ if value_type is None:
+ if isinstance(value, int):
+ typecode = 'i'
+ elif isinstance(value, float):
+ typecode = 'd'
+ elif isinstance(value, str):
+ typecode = 'Z'
+ elif isinstance(value, bytes):
+ typecode = 'Z'
+ elif isinstance(value, array.array) or \
+ isinstance(value, list) or \
+ isinstance(value, tuple):
+ typecode = 'B'
+ else:
+ return 0
+ else:
+ if value_type not in 'Zidf':
+ return 0
+ value_type = force_bytes(value_type)
+ _char_type = value_type
+ typecode = (<uint8_t*>_char_type)[0]
+
+ return typecode
+
+
+cdef inline getTypecode(value, maximum_value=None):
+ '''returns the value typecode of a value.
+
+ If max is specified, the approprite type is
+ returned for a range where value is the minimum.
+ '''
+
+ if maximum_value is None:
+ maximum_value = value
+
+ t = type(value)
+
+ if t is float:
+ valuetype = b'f'
+ elif t is int:
+ # signed ints
+ if value < 0:
+ if value >= -128 and maximum_value < 128:
+ valuetype = b'c'
+ elif value >= -32768 and maximum_value < 32768:
+ valuetype = b's'
+ elif value < -2147483648 or maximum_value >= 2147483648:
+ raise ValueError(
+ "at least one signed integer out of range of "
+ "BAM/SAM specification")
+ else:
+ valuetype = b'i'
+ # unsigned ints
+ else:
+ if maximum_value < 256:
+ valuetype = b'C'
+ elif maximum_value < 65536:
+ valuetype = b'S'
+ elif maximum_value >= 4294967296:
+ raise ValueError(
+ "at least one integer out of range of BAM/SAM specification")
+ else:
+ valuetype = b'I'
+ else:
+ # Note: hex strings (H) are not supported yet
+ if t is not bytes:
+ value = value.encode('ascii')
+ if len(value) == 1:
+ valuetype = b"A"
+ else:
+ valuetype = b'Z'
+
+ return valuetype
+
+
+cdef inline packTags(tags):
+ """pack a list of tags. Each tag is a tuple of (tag, tuple).
+
+ Values are packed into the most space efficient data structure
+ possible unless the tag contains a third field with the typecode.
+
+ Returns a format string and the associated list of arguments
+ to be used in a call to struct.pack_into.
+ """
+ fmts, args = ["<"], []
+
+ datatype2format = {
+ 'c': ('b', 1),
+ 'C': ('B', 1),
+ 's': ('h', 2),
+ 'S': ('H', 2),
+ 'i': ('i', 4),
+ 'I': ('I', 4),
+ 'f': ('f', 4),
+ 'A': ('c', 1)}
+
+ for tag in tags:
+
+ if len(tag) == 2:
+ pytag, value = tag
+ valuetype = None
+ elif len(tag) == 3:
+ pytag, value, valuetype = tag
+ else:
+ raise ValueError("malformatted tag: %s" % str(tag))
+
+ if not type(pytag) is bytes:
+ pytag = pytag.encode('ascii')
+
+ t = type(value)
+
+ if t is tuple or t is list:
+ # binary tags from tuples or lists
+ if valuetype is None:
+ # automatically determine value type - first value
+ # determines type. If there is a mix of types, the
+ # result is undefined.
+ valuetype = getTypecode(min(value), max(value))
+
+ if valuetype not in datatype2format:
+ raise ValueError("invalid value type '%s'" % valuetype)
+
+ datafmt = "2sccI%i%s" % (len(value), datatype2format[valuetype][0])
+ args.extend([pytag[:2],
+ b"B",
+ valuetype,
+ len(value)] + list(value))
+
+ elif isinstance(value, array.array):
+ # binary tags from arrays
+ if valuetype is None:
+ valuetype = chr(map_typecode_python_to_htslib(ord(value.typecode)))
+
+ if valuetype not in datatype2format:
+ raise ValueError("invalid value type '%s'" % valuetype)
+
+ # use array.tostring() to retrieve byte representation and
+ # save as bytes
+ datafmt = "2sccI%is" % (len(value) * datatype2format[valuetype][1])
+ args.extend([pytag[:2],
+ b"B",
+ valuetype,
+ len(value),
+ value.tostring()])
+
+ else:
+ if valuetype is None:
+ valuetype = getTypecode(value)
+
+ if valuetype == b"Z":
+ datafmt = "2sc%is" % (len(value)+1)
+ else:
+ datafmt = "2sc%s" % datatype2format[valuetype][0]
+
+ args.extend([pytag[:2],
+ valuetype,
+ value])
+
+ fmts.append(datafmt)
+
+ return "".join(fmts), args
+
+
+cdef inline int32_t calculateQueryLength(bam1_t * src):
+ """return query length computed from CIGAR alignment.
+
+ Return 0 if there is no CIGAR alignment.
+ """
+
+ cdef uint32_t * cigar_p = pysam_bam_get_cigar(src)
+
+ if cigar_p == NULL:
+ return 0
+
+ cdef uint32_t k, qpos
+ cdef int op
+ qpos = 0
+
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+
+ if op == BAM_CMATCH or op == BAM_CINS or \
+ op == BAM_CSOFT_CLIP or \
+ op == BAM_CEQUAL or op == BAM_CDIFF:
+ qpos += cigar_p[k] >> BAM_CIGAR_SHIFT
+
+ return qpos
+
+
+cdef inline int32_t getQueryStart(bam1_t *src) except -1:
+ cdef uint32_t * cigar_p
+ cdef uint32_t k, op
+ cdef uint32_t start_offset = 0
+
+ if pysam_get_n_cigar(src):
+ cigar_p = pysam_bam_get_cigar(src);
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ if op == BAM_CHARD_CLIP:
+ if start_offset != 0 and start_offset != src.core.l_qseq:
+ PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string')
+ return -1
+ elif op == BAM_CSOFT_CLIP:
+ start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
+ else:
+ break
+
+ return start_offset
+
+
+cdef inline int32_t getQueryEnd(bam1_t *src) except -1:
+ cdef uint32_t * cigar_p
+ cdef uint32_t k, op
+ cdef uint32_t end_offset = src.core.l_qseq
+
+ # if there is no sequence, compute length from cigar string
+ if end_offset == 0:
+ end_offset = calculateQueryLength(src)
+
+ # walk backwards in cigar string
+ if pysam_get_n_cigar(src) > 1:
+ cigar_p = pysam_bam_get_cigar(src);
+ for k from pysam_get_n_cigar(src) > k >= 1:
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ if op == BAM_CHARD_CLIP:
+ if end_offset != 0 and end_offset != src.core.l_qseq:
+ PyErr_SetString(ValueError,
+ 'Invalid clipping in CIGAR string')
+ return -1
+ elif op == BAM_CSOFT_CLIP:
+ end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT
+ else:
+ break
+
+ return end_offset
+
+
+cdef inline object getSequenceInRange(bam1_t *src,
+ uint32_t start,
+ uint32_t end):
+ """return python string of the sequence in a bam1_t object.
+ """
+
+ cdef uint8_t * p
+ cdef uint32_t k
+ cdef char * s
+
+ if not src.core.l_qseq:
+ return None
+
+ seq = PyBytes_FromStringAndSize(NULL, end - start)
+ s = <char*>seq
+ p = pysam_bam_get_seq(src)
+
+ for k from start <= k < end:
+ # equivalent to seq_nt16_str[bam1_seqi(s, i)] (see bam.c)
+ # note: do not use string literal as it will be a python string
+ s[k-start] = seq_nt16_str[p[k/2] >> 4 * (1 - k%2) & 0xf]
+
+ return charptr_to_str(seq)
+
+
+cdef inline object getQualitiesInRange(bam1_t *src,
+ uint32_t start,
+ uint32_t end):
+ """return python array of quality values from a bam1_t object"""
+
+ cdef uint8_t * p
+ cdef uint32_t k
+
+ p = pysam_bam_get_qual(src)
+ if p[0] == 0xff:
+ return None
+
+ # 'B': unsigned char
+ cdef c_array.array result = array.array('B', [0])
+ c_array.resize(result, end - start)
+
+ # copy data
+ memcpy(result.data.as_voidptr, <void*>&p[start], end - start)
+
+ return result
+
+
+#####################################################################
+## private factory methods
+cdef class AlignedSegment
+cdef makeAlignedSegment(bam1_t * src, AlignmentFile alignment_file):
+ '''return an AlignedSegment object constructed from `src`'''
+ # note that the following does not call __init__
+ cdef AlignedSegment dest = AlignedSegment.__new__(AlignedSegment)
+ dest._delegate = bam_dup1(src)
+ dest._alignment_file = alignment_file
+ return dest
+
+
+cdef class PileupColumn
+cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos,
+ int n_pu, AlignmentFile alignment_file):
+ '''return a PileupColumn object constructed from pileup in `plp` and
+ setting additional attributes.
+
+ '''
+ # note that the following does not call __init__
+ cdef PileupColumn dest = PileupColumn.__new__(PileupColumn)
+ dest._alignment_file = alignment_file
+ dest.plp = plp
+ dest.tid = tid
+ dest.pos = pos
+ dest.n_pu = n_pu
+ return dest
+
+cdef class PileupRead
+cdef inline makePileupRead(bam_pileup1_t * src, AlignmentFile alignment_file):
+ '''return a PileupRead object construted from a bam_pileup1_t * object.'''
+ cdef PileupRead dest = PileupRead.__new__(PileupRead)
+ dest._alignment = makeAlignedSegment(src.b, alignment_file)
+ dest._qpos = src.qpos
+ dest._indel = src.indel
+ dest._level = src.level
+ dest._is_del = src.is_del
+ dest._is_head = src.is_head
+ dest._is_tail = src.is_tail
+ dest._is_refskip = src.is_refskip
+ return dest
+
+
+# TODO: avoid string copying for getSequenceInRange, reconstituneSequenceFromMD, ...
+cdef inline object reconstituteSequenceFromMD(bam1_t * src):
+ """return reference sequence from MD tag.
+
+ Returns
+ -------
+
+ None, if no MD tag is present.
+ """
+
+ cdef uint8_t * md_tag_ptr = bam_aux_get(src, "MD")
+
+ if md_tag_ptr == NULL:
+ return None
+
+ cdef uint32_t start, end
+ start = getQueryStart(src)
+ end = getQueryEnd(src)
+
+ # get read sequence, taking into account soft-clipping
+ r = getSequenceInRange(src, start, end)
+ cdef char * read_sequence = r
+
+ cdef char * md_tag = <char*>bam_aux2Z(md_tag_ptr)
+ cdef int md_idx = 0
+ cdef int r_idx = 0
+ cdef int nmatches = 0
+ cdef int x = 0
+ cdef int s_idx = 0
+
+ # maximum length of sequence is read length + inserts in MD tag + \0
+ cdef uint32_t max_len = end - start + strlen(md_tag) + 1
+ cdef char * s = <char*>calloc(max_len, sizeof(char))
+ if s == NULL:
+ raise ValueError(
+ "could not allocated sequence of length %i" % max_len)
+ while md_tag[md_idx] != 0:
+ # c is numerical
+ if md_tag[md_idx] >= 48 and md_tag[md_idx] <= 57:
+ nmatches *= 10
+ nmatches += md_tag[md_idx] - 48
+ md_idx += 1
+ continue
+ else:
+ # save matches up to this point
+ for x from r_idx <= x < r_idx + nmatches:
+ s[s_idx] = read_sequence[x]
+ s_idx += 1
+ r_idx += nmatches
+ nmatches = 0
+
+ if md_tag[md_idx] == '^':
+ md_idx += 1
+ while md_tag[md_idx] >= 65 and md_tag[md_idx] <= 90:
+ s[s_idx] = md_tag[md_idx]
+ s_idx += 1
+ md_idx += 1
+ else:
+ # convert mismatch to lower case
+ s[s_idx] = md_tag[md_idx] + 32
+ s_idx += 1
+ r_idx += 1
+ md_idx += 1
+
+ # save matches up to this point
+ for x from r_idx <= x < r_idx + nmatches:
+ s[s_idx] = read_sequence[x]
+ s_idx += 1
+
+ seq = PyBytes_FromStringAndSize(s, s_idx)
+ free(s)
+ return seq
+
+
+cdef class AlignedSegment:
+ '''Class representing an aligned segment.
+
+ This class stores a handle to the samtools C-structure representing
+ an aligned read. Member read access is forwarded to the C-structure
+ and converted into python objects. This implementation should be fast,
+ as only the data needed is converted.
+
+ For write access, the C-structure is updated in-place. This is
+ not the most efficient way to build BAM entries, as the variable
+ length data is concatenated and thus needs to be resized if
+ a field is updated. Furthermore, the BAM entry might be
+ in an inconsistent state.
+
+ One issue to look out for is that the sequence should always
+ be set *before* the quality scores. Setting the sequence will
+ also erase any quality scores that were set previously.
+ '''
+
+ # Now only called when instances are created from Python
+ def __init__(self):
+ # see bam_init1
+ self._delegate = <bam1_t*>calloc(1, sizeof(bam1_t))
+ # allocate some memory. If size is 0, calloc does not return a
+ # pointer that can be passed to free() so allocate 40 bytes
+ # for a new read
+ self._delegate.m_data = 40
+ self._delegate.data = <uint8_t *>calloc(
+ self._delegate.m_data, 1)
+ self._delegate.l_data = 0
+
+ # caching for selected fields
+ self.cache_query_qualities = None
+ self.cache_query_alignment_qualities = None
+ self.cache_query_sequence = None
+ self.cache_query_alignment_sequence = None
+
+ def __dealloc__(self):
+ bam_destroy1(self._delegate)
+
+ def __str__(self):
+ """return string representation of alignment.
+
+ The representation is an approximate :term:`SAM` format, because
+ an aligned read might not be associated with a :term:`AlignmentFile`.
+ As a result :term:`tid` is shown instead of the reference name.
+ Similarly, the tags field is returned in its parsed state.
+
+ To get a valid SAM record, use :meth:`tostring`.
+ """
+ # sam-parsing is done in sam.c/bam_format1_core which
+ # requires a valid header.
+ return "\t".join(map(str, (self.query_name,
+ self.flag,
+ self.reference_id,
+ self.reference_start,
+ self.mapping_quality,
+ self.cigarstring,
+ self.next_reference_id,
+ self.next_reference_start,
+ self.query_alignment_length,
+ self.query_sequence,
+ self.query_qualities,
+ self.tags)))
+
+ def __copy__(self):
+ return makeAlignedSegment(self._delegate, self._alignment_file)
+
+ def __deepcopy__(self, memo):
+ return makeAlignedSegment(self._delegate, self._alignment_file)
+
+ def compare(self, AlignedSegment other):
+ '''return -1,0,1, if contents in this are binary
+ <,=,> to *other*
+
+ '''
+
+ cdef int retval, x
+ cdef bam1_t *t
+ cdef bam1_t *o
+
+ t = self._delegate
+ o = other._delegate
+
+ # uncomment for debugging purposes
+ # cdef unsigned char * oo, * tt
+ # tt = <unsigned char*>(&t.core)
+ # oo = <unsigned char*>(&o.core)
+ # for x from 0 <= x < sizeof( bam1_core_t): print x, tt[x], oo[x]
+ # tt = <unsigned char*>(t.data)
+ # oo = <unsigned char*>(o.data)
+ # for x from 0 <= x < max(t.l_data, o.l_data): print x, tt[x], oo[x], chr(tt[x]), chr(oo[x])
+
+ # Fast-path test for object identity
+ if t == o:
+ return 0
+
+ retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
+
+ if retval:
+ return retval
+ # cmp(t.l_data, o.l_data)
+ retval = (t.l_data > o.l_data) - (t.l_data < o.l_data)
+ if retval:
+ return retval
+ return memcmp(t.data, o.data, t.l_data)
+
+ def __richcmp__(self, AlignedSegment other, int op):
+ if op == 2: # == operator
+ return self.compare(other) == 0
+ elif op == 3: # != operator
+ return self.compare(other) != 0
+ else:
+ return NotImplemented
+
+ def __hash__(self):
+ cdef bam1_t * src
+ src = self._delegate
+ # shift and xor values in the core structure
+ # make sure tid and mtid are shifted by different amounts
+ # should variable length data be included?
+ cdef uint32_t hash_value = src.core.tid << 24 ^ \
+ src.core.pos << 16 ^ \
+ src.core.qual << 8 ^ \
+ src.core.flag ^ \
+ src.core.isize << 24 ^ \
+ src.core.mtid << 16 ^ \
+ src.core.mpos << 8
+
+ return hash_value
+
+ cpdef bytes tostring(self, AlignmentFile_t htsfile):
+ """returns a string representation of the aligned segment.
+
+ The output format is valid SAM format if
+
+ Parameters
+ ----------
+
+ htsfile -- AlignmentFile object to map numerical
+ identifers to chromosome names.
+ """
+
+ cdef cython.str cigarstring, mate_ref, ref
+ if self.reference_id < 0:
+ ref = "*"
+ else:
+ ref = htsfile.getrname(self.reference_id)
+
+ if self.rnext < 0:
+ mate_ref = "*"
+ elif self.rnext == self.reference_id:
+ mate_ref = "="
+ else:
+ mate_ref = htsfile.getrname(self.rnext)
+
+ cigarstring = self.cigarstring if(
+ self.cigarstring is not None) else "*"
+ ret = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (
+ self.query_name, self.flag,
+ ref, self.pos + 1, self.mapq,
+ cigarstring,
+ mate_ref, self.mpos + 1,
+ self.template_length,
+ self.seq, self.qual,
+ self.get_tag_string())
+ return <bytes> ret
+
+ cdef bytes get_tag_string(self):
+ cdef tuple tag
+ cdef cython.str ret = "\t".join([
+ TagToString(tag) for tag in
+ self.get_tags(with_value_type=True)])
+ return <bytes> ret
+
+ ########################################################
+ ## Basic attributes in order of appearance in SAM format
+ property query_name:
+ """the query template name (None if not present)"""
+ def __get__(self):
+ cdef bam1_t * src
+ src = self._delegate
+ if pysam_get_l_qname(src) == 0:
+ return None
+ return charptr_to_str(<char *>pysam_bam_get_qname(src))
+
+ def __set__(self, qname):
+ if qname is None or len(qname) == 0:
+ return
+ qname = force_bytes(qname)
+ cdef bam1_t * src
+ cdef int l
+ cdef char * p
+
+ src = self._delegate
+ p = pysam_bam_get_qname(src)
+
+ # the qname is \0 terminated
+ l = len(qname) + 1
+ pysam_bam_update(src,
+ pysam_get_l_qname(src),
+ l,
+ <uint8_t*>p)
+
+
+ pysam_set_l_qname(src, l)
+
+ # re-acquire pointer to location in memory
+ # as it might have moved
+ p = pysam_bam_get_qname(src)
+
+ strncpy(p, qname, l)
+
+ property flag:
+ """properties flag"""
+ def __get__(self):
+ return pysam_get_flag(self._delegate)
+ def __set__(self, flag):
+ pysam_set_flag(self._delegate, flag)
+
+ property reference_name:
+ """:term:`reference` name (None if no AlignmentFile is associated)"""
+ def __get__(self):
+ if self._alignment_file is not None:
+ return self._alignment_file.getrname(self._delegate.core.tid)
+ return None
+
+ property reference_id:
+ """:term:`reference` ID
+
+ .. note::
+
+ This field contains the index of the reference sequence in
+ the sequence dictionary. To obtain the name of the
+ reference sequence, use
+ :meth:`pysam.AlignmentFile.getrname()`
+
+ """
+ def __get__(self): return self._delegate.core.tid
+ def __set__(self, tid): self._delegate.core.tid = tid
+
+ property reference_start:
+ """0-based leftmost coordinate"""
+ def __get__(self): return self._delegate.core.pos
+ def __set__(self, pos):
+ ## setting the position requires updating the "bin" attribute
+ cdef bam1_t * src
+ src = self._delegate
+ src.core.pos = pos
+ if pysam_get_n_cigar(src):
+ pysam_set_bin(src,
+ hts_reg2bin(
+ src.core.pos,
+ bam_endpos(src),
+ 14,
+ 5))
+ else:
+ pysam_set_bin(src,
+ hts_reg2bin(
+ src.core.pos,
+ src.core.pos + 1,
+ 14,
+ 5))
+
+ property mapping_quality:
+ """mapping quality"""
+ def __get__(self):
+ return pysam_get_qual(self._delegate)
+ def __set__(self, qual):
+ pysam_set_qual(self._delegate, qual)
+
+ property cigarstring:
+ '''the :term:`cigar` alignment as a string.
+
+ The cigar string is a string of alternating integers
+ and characters denoting the length and the type of
+ an operation.
+
+ .. note::
+ The order length,operation is specified in the
+ SAM format. It is different from the order of
+ the :attr:`cigar` property.
+
+ Returns None if not present.
+
+ To unset the cigarstring, assign None or the
+ empty string.
+ '''
+ def __get__(self):
+ c = self.cigartuples
+ if c is None:
+ return None
+ # reverse order
+ else:
+ return "".join([ "%i%c" % (y,CODE2CIGAR[x]) for x,y in c])
+
+ def __set__(self, cigar):
+ if cigar is None or len(cigar) == 0:
+ self.cigartuples = []
+ else:
+ parts = CIGAR_REGEX.findall(cigar)
+ # reverse order
+ self.cigartuples = [(CIGAR2CODE[ord(y)], int(x)) for x,y in parts]
+
+ # TODO
+ # property cigar:
+ # """the cigar alignment"""
+
+ property next_reference_id:
+ """the :term:`reference` id of the mate/next read."""
+ def __get__(self): return self._delegate.core.mtid
+ def __set__(self, mtid):
+ self._delegate.core.mtid = mtid
+
+ property next_reference_name:
+ """:term:`reference` name of the mate/next read (None if no
+ AlignmentFile is associated)"""
+ def __get__(self):
+ if self._alignment_file is not None:
+ return self._alignment_file.getrname(self._delegate.core.mtid)
+ return None
+
+ property next_reference_start:
+ """the position of the mate/next read."""
+ def __get__(self):
+ return self._delegate.core.mpos
+ def __set__(self, mpos):
+ self._delegate.core.mpos = mpos
+
+ property query_length:
+ """the length of the query/read.
+
+ This value corresponds to the length of the sequence supplied
+ in the BAM/SAM file. The length of a query is 0 if there is no
+ sequence in the BAM/SAM file. In those cases, the read length
+ can be inferred from the CIGAR alignment, see
+ :meth:`pysam.AlignmentFile.infer_query_length.`.
+
+ The length includes soft-clipped bases and is equal to
+ ``len(query_sequence)``.
+
+ This property is read-only but can be set by providing a
+ sequence.
+
+ Returns 0 if not available.
+
+ """
+ def __get__(self):
+ return self._delegate.core.l_qseq
+
+ property template_length:
+ """the observed query template length"""
+ def __get__(self):
+ return self._delegate.core.isize
+ def __set__(self, isize):
+ self._delegate.core.isize = isize
+
+ property query_sequence:
+ """read sequence bases, including :term:`soft clipped` bases
+ (None if not present).
+
+ Note that assigning to seq will invalidate any quality scores.
+ Thus, to in-place edit the sequence and quality scores, copies of
+ the quality scores need to be taken. Consider trimming for example::
+
+ q = read.query_qualities
+ read.query_squence = read.query_sequence[5:10]
+ read.query_qualities = q[5:10]
+
+ The sequence is returned as it is stored in the BAM file. Some mappers
+ might have stored a reverse complement of the original read
+ sequence.
+ """
+ def __get__(self):
+ if self.cache_query_sequence:
+ return self.cache_query_sequence
+
+ cdef bam1_t * src
+ cdef char * s
+ src = self._delegate
+
+ if src.core.l_qseq == 0:
+ return None
+
+ self.cache_query_sequence = getSequenceInRange(
+ src, 0, src.core.l_qseq)
+ return self.cache_query_sequence
+
+ def __set__(self, seq):
+ # samtools manages sequence and quality length memory together
+ # if no quality information is present, the first byte says 0xff.
+ cdef bam1_t * src
+ cdef uint8_t * p
+ cdef char * s
+ cdef int l, k
+ cdef Py_ssize_t nbytes_new, nbytes_old
+
+ if seq == None:
+ l = 0
+ else:
+ l = len(seq)
+ seq = force_bytes(seq)
+
+ src = self._delegate
+
+ # as the sequence is stored in half-bytes, the total length (sequence
+ # plus quality scores) is (l+1)/2 + l
+ nbytes_new = (l + 1) / 2 + l
+ nbytes_old = (src.core.l_qseq + 1) / 2 + src.core.l_qseq
+
+ # acquire pointer to location in memory
+ p = pysam_bam_get_seq(src)
+ src.core.l_qseq = l
+
+ # change length of data field
+ pysam_bam_update(src,
+ nbytes_old,
+ nbytes_new,
+ p)
+
+ if l > 0:
+ # re-acquire pointer to location in memory
+ # as it might have moved
+ p = pysam_bam_get_seq(src)
+ for k from 0 <= k < nbytes_new:
+ p[k] = 0
+ # convert to C string
+ s = seq
+ for k from 0 <= k < l:
+ p[k/2] |= seq_nt16_table[<unsigned char>s[k]] << 4 * (1 - k % 2)
+
+ # erase qualities
+ p = pysam_bam_get_qual(src)
+ p[0] = 0xff
+
+ self.cache_query_sequence = seq
+
+ # clear cached values for quality values
+ self.cache_query_qualities = None
+ self.cache_query_alignment_qualities = None
+
+ property query_qualities:
+ """read sequence base qualities, including :term:`soft
+ clipped` bases (None if not present).
+
+ Quality scores are returned as a python array of unsigned
+ chars. Note that this is not the ASCII-encoded value typically
+ seen in FASTQ or SAM formatted files. Thus, no offset of 33
+ needs to be subtracted.
+
+ Note that to set quality scores the sequence has to be set
+ beforehand as this will determine the expected length of the
+ quality score array.
+
+ This method raises a ValueError if the length of the
+ quality scores and the sequence are not the same.
+
+ """
+ def __get__(self):
+
+ if self.cache_query_qualities:
+ return self.cache_query_qualities
+
+ cdef bam1_t * src
+ cdef char * q
+
+ src = self._delegate
+
+ if src.core.l_qseq == 0:
+ return None
+
+ self.cache_query_qualities = getQualitiesInRange(src, 0, src.core.l_qseq)
+ return self.cache_query_qualities
+
+ def __set__(self, qual):
+
+ # note that memory is already allocated via setting the sequence
+ # hence length match of sequence and quality needs is checked.
+ cdef bam1_t * src
+ cdef uint8_t * p
+ cdef int l
+
+ src = self._delegate
+ p = pysam_bam_get_qual(src)
+ if qual is None or len(qual) == 0:
+ # if absent and there is a sequence: set to 0xff
+ if src.core.l_qseq != 0:
+ p[0] = 0xff
+ return
+
+ # check for length match
+ l = len(qual)
+ if src.core.l_qseq != l:
+ raise ValueError(
+ "quality and sequence mismatch: %i != %i" %
+ (l, src.core.l_qseq))
+
+ # create a python array object filling it
+ # with the quality scores
+
+ # NB: should avoid this copying if qual is
+ # already of the correct type.
+ cdef c_array.array result = c_array.array('B', qual)
+
+ # copy data
+ memcpy(p, result.data.as_voidptr, l)
+
+ # save in cache
+ self.cache_query_qualities = qual
+
+ property bin:
+ """properties bin"""
+ def __get__(self):
+ return pysam_get_bin(self._delegate)
+ def __set__(self, bin):
+ pysam_set_bin(self._delegate, bin)
+
+
+ ##########################################################
+ # Derived simple attributes. These are simple attributes of
+ # AlignedSegment getting and setting values.
+ ##########################################################
+ # 1. Flags
+ ##########################################################
+ property is_paired:
+ """true if read is paired in sequencing"""
+ def __get__(self):
+ return (self.flag & BAM_FPAIRED) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FPAIRED)
+
+ property is_proper_pair:
+ """true if read is mapped in a proper pair"""
+ def __get__(self):
+ return (self.flag & BAM_FPROPER_PAIR) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FPROPER_PAIR)
+ property is_unmapped:
+ """true if read itself is unmapped"""
+ def __get__(self):
+ return (self.flag & BAM_FUNMAP) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FUNMAP)
+ property mate_is_unmapped:
+ """true if the mate is unmapped"""
+ def __get__(self):
+ return (self.flag & BAM_FMUNMAP) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FMUNMAP)
+ property is_reverse:
+ """true if read is mapped to reverse strand"""
+ def __get__(self):
+ return (self.flag & BAM_FREVERSE) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FREVERSE)
+ property mate_is_reverse:
+ """true is read is mapped to reverse strand"""
+ def __get__(self):
+ return (self.flag & BAM_FMREVERSE) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FMREVERSE)
+ property is_read1:
+ """true if this is read1"""
+ def __get__(self):
+ return (self.flag & BAM_FREAD1) != 0
+ def __set__(self,val):
+ pysam_update_flag(self._delegate, val, BAM_FREAD1)
+ property is_read2:
+ """true if this is read2"""
+ def __get__(self):
+ return (self.flag & BAM_FREAD2) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FREAD2)
+ property is_secondary:
+ """true if not primary alignment"""
+ def __get__(self):
+ return (self.flag & BAM_FSECONDARY) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FSECONDARY)
+ property is_qcfail:
+ """true if QC failure"""
+ def __get__(self):
+ return (self.flag & BAM_FQCFAIL) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FQCFAIL)
+ property is_duplicate:
+ """true if optical or PCR duplicate"""
+ def __get__(self):
+ return (self.flag & BAM_FDUP) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FDUP)
+ property is_supplementary:
+ """true if this is a supplementary alignment"""
+ def __get__(self):
+ return (self.flag & BAM_FSUPPLEMENTARY) != 0
+ def __set__(self, val):
+ pysam_update_flag(self._delegate, val, BAM_FSUPPLEMENTARY)
+
+ # 2. Coordinates and lengths
+ property reference_end:
+ '''aligned reference position of the read on the reference genome.
+
+ reference_end points to one past the last aligned residue.
+ Returns None if not available (read is unmapped or no cigar
+ alignment present).
+
+ '''
+ def __get__(self):
+ cdef bam1_t * src
+ src = self._delegate
+ if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0:
+ return None
+ return bam_endpos(src)
+
+ property reference_length:
+ '''aligned length of the read on the reference genome.
+
+ This is equal to `aend - pos`. Returns None if not available.'''
+ def __get__(self):
+ cdef bam1_t * src
+ src = self._delegate
+ if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0:
+ return None
+ return bam_endpos(src) - \
+ self._delegate.core.pos
+
+ property query_alignment_sequence:
+ """aligned portion of the read.
+
+ This is a substring of :attr:`seq` that excludes flanking
+ bases that were :term:`soft clipped` (None if not present). It
+ is equal to ``seq[qstart:qend]``.
+
+ SAM/BAM files may include extra flanking bases that are not
+ part of the alignment. These bases may be the result of the
+ Smith-Waterman or other algorithms, which may not require
+ alignments that begin at the first residue or end at the last.
+ In addition, extra sequencing adapters, multiplex identifiers,
+ and low-quality bases that were not considered for alignment
+ may have been retained.
+
+ """
+
+ def __get__(self):
+ if self.cache_query_alignment_sequence:
+ return self.cache_query_alignment_sequence
+
+ cdef bam1_t * src
+ cdef uint32_t start, end
+
+ src = self._delegate
+
+ if src.core.l_qseq == 0:
+ return None
+
+ start = getQueryStart(src)
+ end = getQueryEnd(src)
+
+ self.cache_query_alignment_sequence = getSequenceInRange(src, start, end)
+ return self.cache_query_alignment_sequence
+
+ property query_alignment_qualities:
+ """aligned query sequence quality values (None if not present). These
+ are the quality values that correspond to :attr:`query`, that
+ is, they exclude qualities of :term:`soft clipped` bases. This
+ is equal to ``qual[qstart:qend]``.
+
+ Quality scores are returned as a python array of unsigned
+ chars. Note that this is not the ASCII-encoded value typically
+ seen in FASTQ or SAM formatted files. Thus, no offset of 33
+ needs to be subtracted.
+
+ This property is read-only.
+
+ """
+ def __get__(self):
+
+ if self.cache_query_alignment_qualities:
+ return self.cache_query_alignment_qualities
+
+ cdef bam1_t * src
+ cdef uint32_t start, end
+
+ src = self._delegate
+
+ if src.core.l_qseq == 0:
+ return None
+
+ start = getQueryStart(src)
+ end = getQueryEnd(src)
+ self.cache_query_alignment_qualities = \
+ getQualitiesInRange(src, start, end)
+ return self.cache_query_alignment_qualities
+
+ property query_alignment_start:
+ """start index of the aligned query portion of the sequence (0-based,
+ inclusive).
+
+ This the index of the first base in :attr:`seq` that is not
+ soft-clipped.
+
+ """
+ def __get__(self):
+ return getQueryStart(self._delegate)
+
+ property query_alignment_end:
+ """end index of the aligned query portion of the sequence (0-based,
+ exclusive)"""
+ def __get__(self):
+ return getQueryEnd(self._delegate)
+
+ property query_alignment_length:
+ """length of the aligned query sequence.
+
+ This is equal to :attr:`qend` - :attr:`qstart`"""
+ def __get__(self):
+ cdef bam1_t * src
+ src = self._delegate
+ return getQueryEnd(src) - getQueryStart(src)
+
+ #####################################################
+ # Computed properties
+
+ def get_reference_positions(self, full_length=False):
+ """a list of reference positions that this read aligns to.
+
+ By default, this method only returns positions in the
+ reference that are within the alignment. If *full_length* is
+ set, None values will be included for any soft-clipped or
+ unaligned positions within the read. The returned list will
+ thus be of the same length as the read.
+
+ """
+ cdef uint32_t k, i, pos
+ cdef int op
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src
+ cdef bint _full = full_length
+
+ src = self._delegate
+ if pysam_get_n_cigar(src) == 0:
+ return []
+
+ result = []
+ pos = src.core.pos
+ cigar_p = pysam_bam_get_cigar(src)
+
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+
+ if op == BAM_CSOFT_CLIP or op == BAM_CINS:
+ if _full:
+ for i from 0 <= i < l:
+ result.append(None)
+ elif op == BAM_CMATCH:
+ for i from pos <= i < pos + l:
+ result.append(i)
+ pos += l
+ elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ pos += l
+
+ return result
+
+ def infer_query_length(self, always=True):
+ """inferred read length from CIGAR string.
+
+ If *always* is set to True, the read length
+ will be always inferred. If set to False, the length
+ of the read sequence will be returned if it is
+ available.
+
+ Returns None if CIGAR string is not present.
+ """
+
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src
+
+ src = self._delegate
+
+ if not always and src.core.l_qseq:
+ return src.core.l_qseq
+
+ return calculateQueryLength(src)
+
+ def get_reference_sequence(self):
+ """return the reference sequence.
+
+ This method requires the MD tag to be set.
+ """
+ return reconstituteSequenceFromMD(self._delegate)
+
+
+ def get_aligned_pairs(self, matches_only=False, with_seq=False):
+ """a list of aligned read (query) and reference positions.
+
+ For inserts, deletions, skipping either query or reference
+ position may be None.
+
+ Padding is currently not supported and leads to an exception.
+
+ Parameters
+ ----------
+
+ matches_only : bool
+
+ If True, only matched bases are returned - no None on either
+ side.
+
+ with_seq : bool
+
+ If True, return a third element in the tuple containing the
+ reference sequence. Substitutions are lower-case. This option
+ requires an MD tag to be present.
+
+ Returns
+ -------
+
+ aligned_pairs : list of tuples
+
+ """
+ cdef uint32_t k, i, pos, qpos, r_idx
+ cdef int op
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src = self._delegate
+ cdef bint _matches_only = bool(matches_only)
+ cdef bint _with_seq = bool(with_seq)
+
+ # TODO: this method performs no checking and assumes that
+ # read sequence, cigar and MD tag are consistent.
+
+ if _with_seq:
+ ref_seq = reconstituteSequenceFromMD(src)
+ if ref_seq is None:
+ raise ValueError("MD tag not present")
+
+ r_idx = 0
+
+ if pysam_get_n_cigar(src) == 0:
+ return []
+
+ result = []
+ pos = src.core.pos
+ qpos = 0
+ cigar_p = pysam_bam_get_cigar(src)
+
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+
+ if op == BAM_CMATCH or op == BAM_CEQUAL or op == BAM_CDIFF:
+ if _with_seq:
+ for i from pos <= i < pos + l:
+ result.append((qpos, i, ref_seq[r_idx]))
+ r_idx += 1
+ qpos += 1
+ else:
+ for i from pos <= i < pos + l:
+ result.append((qpos, i))
+ qpos += 1
+ pos += l
+
+ elif op == BAM_CINS or op == BAM_CSOFT_CLIP:
+ if not _matches_only:
+ if _with_seq:
+ for i from pos <= i < pos + l:
+ result.append((qpos, None, None))
+ qpos += 1
+ else:
+ for i from pos <= i < pos + l:
+ result.append((qpos, None))
+ qpos += 1
+ else:
+ qpos += l
+
+ elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ if not _matches_only:
+ if _with_seq:
+ for i from pos <= i < pos + l:
+ result.append((None, i, ref_seq[r_idx]))
+ r_idx += 1
+ else:
+ for i from pos <= i < pos + l:
+ result.append((None, i))
+ pos += l
+
+ elif op == BAM_CHARD_CLIP:
+ pass # advances neither
+
+ elif op == BAM_CPAD:
+ raise NotImplementedError(
+ "Padding (BAM_CPAD, 6) is currently not supported. "
+ "Please implement. Sorry about that.")
+
+ return result
+
+ def get_blocks(self):
+ """ a list of start and end positions of
+ aligned gapless blocks.
+
+ The start and end positions are in genomic
+ coordinates.
+
+ Blocks are not normalized, i.e. two blocks
+ might be directly adjacent. This happens if
+ the two blocks are separated by an insertion
+ in the read.
+ """
+
+ cdef uint32_t k, pos, l
+ cdef int op
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src
+
+ src = self._delegate
+ if pysam_get_n_cigar(src) == 0:
+ return []
+
+ result = []
+ pos = src.core.pos
+ cigar_p = pysam_bam_get_cigar(src)
+ l = 0
+
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+ if op == BAM_CMATCH:
+ result.append((pos, pos + l))
+ pos += l
+ elif op == BAM_CDEL or op == BAM_CREF_SKIP:
+ pos += l
+
+ return result
+
+ def get_overlap(self, uint32_t start, uint32_t end):
+ """return number of aligned bases of read overlapping the interval
+ *start* and *end* on the reference sequence.
+
+ Return None if cigar alignment is not available.
+ """
+ cdef uint32_t k, i, pos, overlap
+ cdef int op, o
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src
+
+ overlap = 0
+
+ src = self._delegate
+ if pysam_get_n_cigar(src) == 0:
+ return None
+ pos = src.core.pos
+ o = 0
+
+ cigar_p = pysam_bam_get_cigar(src)
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+
+ if op == BAM_CMATCH:
+ o = min( pos + l, end) - max( pos, start )
+ if o > 0: overlap += o
+
+ if op == BAM_CMATCH or op == BAM_CDEL or op == BAM_CREF_SKIP:
+ pos += l
+
+ return overlap
+
+ #####################################################
+ ## Unsorted as yet
+ # TODO: capture in CIGAR object
+ property cigartuples:
+ """the :term:`cigar` alignment. The alignment
+ is returned as a list of tuples of (operation, length).
+
+ If the alignment is not present, None is returned.
+
+ The operations are:
+
+ +-----+--------------+-----+
+ |M |BAM_CMATCH |0 |
+ +-----+--------------+-----+
+ |I |BAM_CINS |1 |
+ +-----+--------------+-----+
+ |D |BAM_CDEL |2 |
+ +-----+--------------+-----+
+ |N |BAM_CREF_SKIP |3 |
+ +-----+--------------+-----+
+ |S |BAM_CSOFT_CLIP|4 |
+ +-----+--------------+-----+
+ |H |BAM_CHARD_CLIP|5 |
+ +-----+--------------+-----+
+ |P |BAM_CPAD |6 |
+ +-----+--------------+-----+
+ |= |BAM_CEQUAL |7 |
+ +-----+--------------+-----+
+ |X |BAM_CDIFF |8 |
+ +-----+--------------+-----+
+
+ .. note::
+ The output is a list of (operation, length) tuples, such as
+ ``[(0, 30)]``.
+ This is different from the SAM specification and
+ the :attr:`cigarstring` property, which uses a
+ (length, operation) order, for example: ``30M``.
+
+ To unset the cigar property, assign an empty list
+ or None.
+ """
+ def __get__(self):
+ cdef uint32_t * cigar_p
+ cdef bam1_t * src
+ cdef uint32_t op, l
+ cdef int k
+
+ src = self._delegate
+ if pysam_get_n_cigar(src) == 0:
+ return None
+
+ cigar = []
+
+ cigar_p = pysam_bam_get_cigar(src);
+ for k from 0 <= k < pysam_get_n_cigar(src):
+ op = cigar_p[k] & BAM_CIGAR_MASK
+ l = cigar_p[k] >> BAM_CIGAR_SHIFT
+ cigar.append((op, l))
+ return cigar
+
+ def __set__(self, values):
+ cdef uint32_t * p
+ cdef bam1_t * src
+ cdef op, l
+ cdef int k, ncigar
+
+ k = 0
+
+ src = self._delegate
+
+ # get location of cigar string
+ p = pysam_bam_get_cigar(src)
+
+ # empty values for cigar string
+ if values is None:
+ values = []
+
+ ncigar = len(values)
+ # create space for cigar data within src.data
+ pysam_bam_update(src,
+ pysam_get_n_cigar(src) * 4,
+ ncigar * 4,
+ <uint8_t*>p)
+
+ # length is number of cigar operations, not bytes
+ pysam_set_n_cigar(src, ncigar)
+
+ # re-acquire pointer to location in memory
+ # as it might have moved
+ p = pysam_bam_get_cigar(src)
+
+ # insert cigar operations
+ for op, l in values:
+ p[k] = l << BAM_CIGAR_SHIFT | op
+ k += 1
+
+ ## setting the cigar string requires updating the bin
+ pysam_set_bin(src,
+ hts_reg2bin(
+ src.core.pos,
+ bam_endpos(src),
+ 14,
+ 5))
+
+
+ cpdef set_tag(self,
+ tag,
+ value,
+ value_type=None,
+ replace=True):
+ """sets a particular field *tag* to *value* in the optional alignment
+ section.
+
+ *value_type* describes the type of *value* that is to entered
+ into the alignment record.. It can be set explicitly to one
+ of the valid one-letter type codes. If unset, an appropriate
+ type will be chosen automatically.
+
+ An existing value of the same *tag* will be overwritten unless
+ replace is set to False. This is usually not recommened as a
+ tag may only appear once in the optional alignment section.
+
+ If *value* is None, the tag will be deleted.
+ """
+
+ cdef int value_size
+ cdef uint8_t * value_ptr
+ cdef uint8_t *existing_ptr
+ cdef uint8_t typecode
+ cdef float float_value
+ cdef double double_value
+ cdef int32_t int_value
+ cdef bam1_t * src = self._delegate
+ cdef char * _value_type
+ cdef c_array.array array_value
+ cdef object buffer
+
+ if len(tag) != 2:
+ raise ValueError('Invalid tag: %s' % tag)
+
+ tag = force_bytes(tag)
+ if replace:
+ existing_ptr = bam_aux_get(src, tag)
+ if existing_ptr:
+ bam_aux_del(src, existing_ptr)
+
+ # setting value to None deletes a tag
+ if value is None:
+ return
+
+ typecode = get_value_code(value, value_type)
+ if typecode == 0:
+ raise ValueError("can't guess type or invalid type code specified")
+
+ # Not Endian-safe, but then again neither is samtools!
+ if typecode == 'Z':
+ value = force_bytes(value)
+ value_ptr = <uint8_t*><char*>value
+ value_size = len(value)+1
+ elif typecode == 'i':
+ int_value = value
+ value_ptr = <uint8_t*>&int_value
+ value_size = sizeof(int32_t)
+ elif typecode == 'd':
+ double_value = value
+ value_ptr = <uint8_t*>&double_value
+ value_size = sizeof(double)
+ elif typecode == 'f':
+ float_value = value
+ value_ptr = <uint8_t*>&float_value
+ value_size = sizeof(float)
+ elif typecode == 'B':
+ # the following goes through python, needs to be cleaned up
+ # pack array using struct
+ if value_type is None:
+ fmt, args = packTags([(tag, value)])
+ else:
+ fmt, args = packTags([(tag, value, value_type)])
+
+ # remove tag and type code as set by bam_aux_append
+ # first four chars of format (<2sc)
+ fmt = '<' + fmt[4:]
+ # first two values to pack
+ args = args[2:]
+ value_size = struct.calcsize(fmt)
+ # buffer will be freed when object goes out of scope
+ buffer = ctypes.create_string_buffer(value_size)
+ struct.pack_into(fmt, buffer, 0, *args)
+ # bam_aux_append copies data from value_ptr
+ bam_aux_append(src,
+ tag,
+ typecode,
+ value_size,
+ <uint8_t*>buffer.raw)
+ return
+ else:
+ raise ValueError('unsupported value_type in set_option')
+
+ bam_aux_append(src,
+ tag,
+ typecode,
+ value_size,
+ value_ptr)
+
+ cpdef has_tag(self, tag):
+ """returns true if the optional alignment section
+ contains a given *tag*."""
+ cdef uint8_t * v
+ cdef int nvalues
+ btag = force_bytes(tag)
+ v = bam_aux_get(self._delegate, btag)
+ return v != NULL
+
+ cpdef get_tag(self, tag, with_value_type=False):
+ """
+ retrieves data from the optional alignment section
+ given a two-letter *tag* denoting the field.
+
+ The returned value is cast into an appropriate python type.
+
+ This method is the fastest way to access the optional
+ alignment section if only few tags need to be retrieved.
+
+ Parameters
+ ----------
+
+ tag :
+ data tag.
+
+ with_value_type : Optional[bool]
+ if set to True, the return value is a tuple of (tag value, type code).
+ (default False)
+
+ Returns
+ -------
+
+ A python object with the value of the `tag`. The type of the
+ object depends on the data type in the data record.
+
+ Raises
+ ------
+
+ KeyError
+ If `tag` is not present, a KeyError is raised.
+
+ """
+ cdef uint8_t * v
+ cdef int nvalues
+ btag = force_bytes(tag)
+ v = bam_aux_get(self._delegate, btag)
+ if v == NULL:
+ raise KeyError("tag '%s' not present" % tag)
+ if chr(v[0]) == "B":
+ auxtype = chr(v[0]) + chr(v[1])
+ else:
+ auxtype = chr(v[0])
+
+ if auxtype == 'c' or auxtype == 'C' or auxtype == 's' or auxtype == 'S':
+ value = <int>bam_aux2i(v)
+ elif auxtype == 'i' or auxtype == 'I':
+ value = <int32_t>bam_aux2i(v)
+ elif auxtype == 'f' or auxtype == 'F':
+ value = <float>bam_aux2f(v)
+ elif auxtype == 'd' or auxtype == 'D':
+ value = <double>bam_aux2f(v)
+ elif auxtype == 'A':
+ # there might a more efficient way
+ # to convert a char into a string
+ value = '%c' % <char>bam_aux2A(v)
+ elif auxtype == 'Z':
+ value = charptr_to_str(<char*>bam_aux2Z(v))
+ elif auxtype[0] == 'B':
+ bytesize, nvalues, values = convert_binary_tag(v + 1)
+ value = values
+ else:
+ raise ValueError("unknown auxiliary type '%s'" % auxtype)
+
+ if with_value_type:
+ return (value, auxtype)
+ else:
+ return value
+
+ def get_tags(self, with_value_type=False):
+ """the fields in the optional aligment section.
+
+ Returns a list of all fields in the optional
+ alignment section. Values are converted to appropriate python
+ values. For example:
+
+ [(NM, 2), (RG, "GJP00TM04")]
+
+ If *with_value_type* is set, the value type as encode in
+ the AlignedSegment record will be returned as well:
+
+ [(NM, 2, "i"), (RG, "GJP00TM04", "Z")]
+
+ This method will convert all values in the optional alignment
+ section. When getting only one or few tags, please see
+ :meth:`get_tag` for a quicker way to achieve this.
+
+ """
+
+ cdef char * ctag
+ cdef bam1_t * src
+ cdef uint8_t * s
+ cdef char auxtag[3]
+ cdef char auxtype
+ cdef uint8_t byte_size
+ cdef int32_t nvalues
+
+ src = self._delegate
+ if src.l_data == 0:
+ return []
+ s = pysam_bam_get_aux(src)
+ result = []
+ auxtag[2] = 0
+ while s < (src.data + src.l_data):
+ # get tag
+ auxtag[0] = s[0]
+ auxtag[1] = s[1]
+ s += 2
+ auxtype = s[0]
+ if auxtype in ('c', 'C'):
+ value = <int>bam_aux2i(s)
+ s += 1
+ elif auxtype in ('s', 'S'):
+ value = <int>bam_aux2i(s)
+ s += 2
+ elif auxtype in ('i', 'I'):
+ value = <int32_t>bam_aux2i(s)
+ s += 4
+ elif auxtype == 'f':
+ value = <float>bam_aux2f(s)
+ s += 4
+ elif auxtype == 'd':
+ value = <double>bam_aux2f(s)
+ s += 8
+ elif auxtype == 'A':
+ value = "%c" % <char>bam_aux2A(s)
+ s += 1
+ elif auxtype in ('Z', 'H'):
+ value = charptr_to_str(<char*>bam_aux2Z(s))
+ # +1 for NULL terminated string
+ s += len(value) + 1
+ elif auxtype == 'B':
+ s += 1
+ byte_size, nvalues, value = convert_binary_tag(s)
+ # 5 for 1 char and 1 int
+ s += 5 + (nvalues * byte_size) - 1
+ else:
+ raise KeyError("unknown type '%s'" % auxtype)
+
+ s += 1
+
+ if with_value_type:
+ result.append((charptr_to_str(auxtag), value, auxtype))
+ else:
+ result.append((charptr_to_str(auxtag), value))
+
+ return result
+
+ def set_tags(self, tags):
+ """sets the fields in the optional alignmest section with
+ a list of (tag, value) tuples.
+
+ The :term:`value type` of the values is determined from the
+ python type. Optionally, a type may be given explicitly as
+ a third value in the tuple, For example:
+
+ x.set_tags([(NM, 2, "i"), (RG, "GJP00TM04", "Z")]
+
+ This method will not enforce the rule that the same tag may appear
+ only once in the optional alignment section.
+ """
+
+ cdef bam1_t * src
+ cdef uint8_t * s
+ cdef char * temp
+ cdef int new_size = 0
+ cdef int old_size
+ src = self._delegate
+
+ # convert and pack the data
+ if tags is not None and len(tags) > 0:
+ fmt, args = packTags(tags)
+ new_size = struct.calcsize(fmt)
+ buffer = ctypes.create_string_buffer(new_size)
+ struct.pack_into(fmt,
+ buffer,
+ 0,
+ *args)
+
+ # delete the old data and allocate new space.
+ # If total_size == 0, the aux field will be
+ # empty
+ old_size = pysam_bam_get_l_aux(src)
+ pysam_bam_update(src,
+ old_size,
+ new_size,
+ pysam_bam_get_aux(src))
+
+ # copy data only if there is any
+ if new_size > 0:
+
+ # get location of new data
+ s = pysam_bam_get_aux(src)
+
+ # check if there is direct path from buffer.raw to tmp
+ p = buffer.raw
+ # create handle to make sure buffer stays alive long
+ # enough for memcpy, see issue 129
+ temp = p
+ memcpy(s, temp, new_size)
+
+
+ ########################################################
+ # Compatibility Accessors
+ # Functions, properties for compatibility with pysam < 0.8
+ #
+ # Several options
+ # change the factory functions according to API
+ # * requires code changes throughout, incl passing
+ # handles to factory functions
+ # subclass functions and add attributes at runtime
+ # e.g.: AlignedSegments.qname = AlignedSegments.query_name
+ # * will slow down the default interface
+ # explicit declaration of getters/setters
+ ########################################################
+ property qname:
+ def __get__(self): return self.query_name
+ def __set__(self, v): self.query_name = v
+ property tid:
+ def __get__(self): return self.reference_id
+ def __set__(self, v): self.reference_id = v
+ property pos:
+ def __get__(self): return self.reference_start
+ def __set__(self, v): self.reference_start = v
+ property mapq:
+ def __get__(self): return self.mapping_quality
+ def __set__(self, v): self.mapping_quality = v
+ property rnext:
+ def __get__(self): return self.next_reference_id
+ def __set__(self, v): self.next_reference_id = v
+ property pnext:
+ def __get__(self):
+ return self.next_reference_start
+ def __set__(self, v):
+ self.next_reference_start = v
+ property cigar:
+ def __get__(self):
+ r = self.cigartuples
+ if r is None:
+ r = []
+ return r
+ def __set__(self, v): self.cigartuples = v
+ property tlen:
+ def __get__(self):
+ return self.template_length
+ def __set__(self, v):
+ self.template_length = v
+ property seq:
+ def __get__(self):
+ return self.query_sequence
+ def __set__(self, v):
+ self.query_sequence = v
+ property qual:
+ def __get__(self):
+ return array_to_qualitystring(self.query_qualities)
+ def __set__(self, v):
+ self.query_qualities = qualitystring_to_array(v)
+ property alen:
+ def __get__(self):
+ return self.reference_length
+ def __set__(self, v):
+ self.reference_length = v
+ property aend:
+ def __get__(self):
+ return self.reference_end
+ def __set__(self, v):
+ self.reference_end = v
+ property rlen:
+ def __get__(self):
+ return self.query_length
+ def __set__(self, v):
+ self.query_length = v
+ property query:
+ def __get__(self):
+ return self.query_alignment_sequence
+ def __set__(self, v):
+ self.query_alignment_sequence = v
+ property qqual:
+ def __get__(self):
+ return array_to_qualitystring(self.query_alignment_qualities)
+ def __set__(self, v):
+ self.query_alignment_qualities = qualitystring_to_array(v)
+ property qstart:
+ def __get__(self):
+ return self.query_alignment_start
+ def __set__(self, v):
+ self.query_alignment_start = v
+ property qend:
+ def __get__(self):
+ return self.query_alignment_end
+ def __set__(self, v):
+ self.query_alignment_end = v
+ property qlen:
+ def __get__(self):
+ return self.query_alignment_length
+ def __set__(self, v):
+ self.query_alignment_length = v
+ property mrnm:
+ def __get__(self):
+ return self.next_reference_id
+ def __set__(self, v):
+ self.next_reference_id = v
+ property mpos:
+ def __get__(self):
+ return self.next_reference_start
+ def __set__(self, v):
+ self.next_reference_start = v
+ property rname:
+ def __get__(self):
+ return self.reference_id
+ def __set__(self, v):
+ self.reference_id = v
+ property isize:
+ def __get__(self):
+ return self.template_length
+ def __set__(self, v):
+ self.template_length = v
+ property blocks:
+ def __get__(self):
+ return self.get_blocks()
+ property aligned_pairs:
+ def __get__(self):
+ return self.get_aligned_pairs()
+ property inferred_length:
+ def __get__(self):
+ return self.infer_query_length()
+ property positions:
+ def __get__(self):
+ return self.get_reference_positions()
+ property tags:
+ def __get__(self):
+ return self.get_tags()
+ def __set__(self, tags):
+ self.set_tags(tags)
+ def overlap(self):
+ return self.get_overlap()
+ def opt(self, tag):
+ return self.get_tag(tag)
+ def setTag(self, tag, value, value_type=None, replace=True):
+ return self.set_tag(tag, value, value_type, replace)
+
+
+cdef class PileupColumn:
+ '''A pileup of reads at a particular reference sequence postion
+ (:term:`column`). A pileup column contains all the reads that map
+ to a certain target base.
+
+ This class is a proxy for results returned by the samtools pileup
+ engine. If the underlying engine iterator advances, the results
+ of this column will change.
+
+ '''
+ def __init__(self):
+ raise TypeError("this class cannot be instantiated from Python")
+
+ def __str__(self):
+ return "\t".join(map(str,
+ (self.reference_id,
+ self.reference_pos,
+ self.nsegments))) +\
+ "\n" +\
+ "\n".join(map(str, self.pileups))
+
+ property reference_id:
+ '''the reference sequence number as defined in the header'''
+ def __get__(self):
+ return self.tid
+
+ property reference_name:
+ """:term:`reference` name (None if no AlignmentFile is associated)"""
+ def __get__(self):
+ if self._alignment_file is not None:
+ return self._alignment_file.getrname(self.tid)
+ return None
+
+ property nsegments:
+ '''number of reads mapping to this column.'''
+ def __get__(self):
+ return self.n_pu
+ def __set__(self, n):
+ self.n_pu = n
+
+ property reference_pos:
+ '''the position in the reference sequence (0-based).'''
+ def __get__(self):
+ return self.pos
+
+ property pileups:
+ '''list of reads (:class:`pysam.PileupRead`) aligned to this column'''
+ def __get__(self):
+ cdef int x
+ pileups = []
+
+ if self.plp == NULL or self.plp[0] == NULL:
+ raise ValueError("PileupColumn accessed after iterator finished")
+
+ # warning: there could be problems if self.n and self.buf are
+ # out of sync.
+ for x from 0 <= x < self.n_pu:
+ pileups.append(makePileupRead(&(self.plp[0][x]), self._alignment_file))
+ return pileups
+
+ ########################################################
+ # Compatibility Accessors
+ # Functions, properties for compatibility with pysam < 0.8
+ ########################################################
+ property pos:
+ def __get__(self):
+ return self.reference_pos
+ def __set__(self, v):
+ self.reference_pos = v
+
+ property tid:
+ def __get__(self):
+ return self.reference_id
+ def __set__(self, v):
+ self.reference_id = v
+
+ property n:
+ def __get__(self):
+ return self.nsegments
+ def __set__(self, v):
+ self.nsegments = v
+
+
+cdef class PileupRead:
+ '''Representation of a read aligned to a particular position in the
+ reference sequence.
+
+ '''
+
+ def __init__(self):
+ raise TypeError(
+ "this class cannot be instantiated from Python")
+
+ def __str__(self):
+ return "\t".join(
+ map(str,
+ (self.alignment, self.query_position,
+ self.indel, self.level,
+ self.is_del, self.is_head,
+ self.is_tail, self.is_refskip)))
+
+ property alignment:
+ """a :class:`pysam.AlignedSegment` object of the aligned read"""
+ def __get__(self):
+ return self._alignment
+
+ property query_position:
+ """position of the read base at the pileup site, 0-based.
+ None if is_del or is_refskip is set.
+
+ """
+ def __get__(self):
+ if self.is_del or self.is_refskip:
+ return None
+ else:
+ return self._qpos
+
+ property indel:
+ """indel length; 0 for no indel, positive for ins and negative for del"""
+ def __get__(self):
+ return self._indel
+
+ property level:
+ """the level of the read in the "viewer" mode"""
+ def __get__(self):
+ return self._level
+
+ property is_del:
+ """1 iff the base on the padded read is a deletion"""
+ def __get__(self):
+ return self._is_del
+
+ property is_head:
+ def __get__(self):
+ return self._is_head
+
+ property is_tail:
+ def __get__(self):
+ return self._is_tail
+
+ property is_refskip:
+ def __get__(self):
+ return self._is_refskip
+
+__all__ = [
+ "AlignedSegment",
+ "PileupColumn",
+ "PileupRead"]
diff --git a/pysam/calignmentfile.pxd b/pysam/calignmentfile.pxd
index b75c1fd..a7e956d 100644
--- a/pysam/calignmentfile.pxd
+++ b/pysam/calignmentfile.pxd
@@ -4,46 +4,19 @@ from libc.stdlib cimport malloc, calloc, realloc, free
from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup
from libc.stdio cimport FILE, printf
-from cfaidx cimport faidx_t, Fastafile
-from chtslib cimport *
+from pysam.cfaidx cimport faidx_t, Fastafile
+from pysam.calignedsegment cimport AlignedSegment
+from pysam.chtslib cimport *
+
+from cpython cimport array
+cimport cython
cdef extern from *:
ctypedef char* const_char_ptr "const char*"
cdef extern from "htslib_util.h":
- int hts_set_verbosity(int verbosity)
- int hts_get_verbosity()
-
- # add *nbytes* into the variable length data of *src* at *pos*
- bam1_t * pysam_bam_update(bam1_t * b,
- size_t nbytes_old,
- size_t nbytes_new,
- uint8_t * pos)
-
- # now: static
- int aux_type2size(int)
-
char * pysam_bam_get_qname(bam1_t * b)
- uint32_t * pysam_bam_get_cigar(bam1_t * b)
- uint8_t * pysam_bam_get_seq(bam1_t * b)
- uint8_t * pysam_bam_get_qual(bam1_t * b)
- uint8_t * pysam_bam_get_aux(bam1_t * b)
- int pysam_bam_get_l_aux(bam1_t * b)
- char pysam_bam_seqi(uint8_t * s, int i)
-
- uint16_t pysam_get_bin(bam1_t * b)
- uint8_t pysam_get_qual(bam1_t * b)
- uint8_t pysam_get_l_qname(bam1_t * b)
- uint16_t pysam_get_flag(bam1_t * b)
- uint16_t pysam_get_n_cigar(bam1_t * b)
- void pysam_set_bin(bam1_t * b, uint16_t v)
- void pysam_set_qual(bam1_t * b, uint8_t v)
- void pysam_set_l_qname(bam1_t * b, uint8_t v)
- void pysam_set_flag(bam1_t * b, uint16_t v)
- void pysam_set_n_cigar(bam1_t * b, uint16_t v)
- void pysam_update_flag(bam1_t * b, uint16_t v, uint16_t flag)
-
cdef extern from "samfile_util.h":
@@ -62,24 +35,6 @@ ctypedef struct __iterdata:
char * seq
int seq_len
-# Exposing pysam extension classes
-#
-# Note: need to declare all C fields and methods here
-cdef class AlignedSegment:
-
- # object that this AlignedSegment represents
- cdef bam1_t * _delegate
-
- # add an alignment tag with value to the AlignedSegment
- # an existing tag of the same name will be replaced.
- cpdef set_tag(self, tag, value, value_type=?, replace=?)
-
- # add an alignment tag with value to the AlignedSegment
- # an existing tag of the same name will be replaced.
- cpdef get_tag(self, tag)
-
- # return true if tag exists
- cpdef has_tag(self, tag)
cdef class AlignmentFile:
@@ -108,15 +63,12 @@ cdef class AlignmentFile:
# beginning of read section
cdef int64_t start_offset
- cdef bam_hdr_t * _buildHeader(self, new_header)
cdef bam1_t * getCurrent(self)
cdef int cnext(self)
# write an aligned read
cpdef int write(self, AlignedSegment read) except -1
- cdef char * _getrname(self, int tid)
-
cdef class PileupColumn:
cdef bam_pileup1_t ** plp
cdef int tid
@@ -143,7 +95,7 @@ cdef class IteratorRow:
cdef class IteratorRowRegion(IteratorRow):
cdef hts_itr_t * iter
- cdef bam1_t * getCurrent( self )
+ cdef bam1_t * getCurrent(self)
cdef int cnext(self)
cdef class IteratorRowHead(IteratorRow):
@@ -153,7 +105,7 @@ cdef class IteratorRowHead(IteratorRow):
cdef int cnext(self)
cdef class IteratorRowAll(IteratorRow):
- cdef bam1_t * getCurrent( self )
+ cdef bam1_t * getCurrent(self)
cdef int cnext(self)
cdef class IteratorRowAllRefs(IteratorRow):
@@ -163,7 +115,7 @@ cdef class IteratorRowAllRefs(IteratorRow):
cdef class IteratorRowSelection(IteratorRow):
cdef int current_pos
cdef positions
- cdef bam1_t * getCurrent( self )
+ cdef bam1_t * getCurrent(self)
cdef int cnext(self)
cdef class IteratorColumn:
@@ -183,13 +135,13 @@ cdef class IteratorColumn:
cdef int max_depth
cdef int cnext(self)
- cdef char * getSequence( self )
+ cdef char * getSequence(self)
cdef setMask(self, mask)
cdef setupIteratorData(self,
int tid,
int start,
int end,
- int multiple_iterators = ?)
+ int multiple_iterators=?)
cdef reset(self, tid, start, end)
cdef _free_pileup_iter(self)
@@ -208,3 +160,4 @@ cdef class IndexedReads:
cdef index
cdef int owns_samfile
cdef bam_hdr_t * header
+
diff --git a/pysam/calignmentfile.pyx b/pysam/calignmentfile.pyx
index 533b0ff..57f2464 100644
--- a/pysam/calignmentfile.pyx
+++ b/pysam/calignmentfile.pyx
@@ -1,207 +1,82 @@
# cython: embedsignature=True
# cython: profile=True
-# adds doc-strings for sphinx
-import tempfile
+########################################################
+########################################################
+# Cython wrapper for SAM/BAM/CRAM files based on htslib
+########################################################
+# The principal classes defined in this module are:
+#
+# class AlignmentFile read/write access to SAM/BAM/CRAM formatted files
+#
+# class IndexedReads index a SAM/BAM/CRAM file by query name while keeping
+# the original sort order intact
+#
+# Additionally this module defines numerous additional classes that are part
+# of the internal API. These are:
+#
+# Various iterator classes to iterate over alignments in sequential (IteratorRow)
+# or in a stacked fashion (IteratorColumn):
+#
+# class IteratorRow
+# class IteratorRowRegion
+# class IteratorRowHead
+# class IteratorRowAll
+# class IteratorRowAllRefs
+# class IteratorRowSelection
+# class IteratorColumn
+# class IteratorColumnRegion
+# class IteratorColumnAllRefs
+#
+########################################################
+#
+# The MIT License
+#
+# Copyright (c) 2015 Andreas Heger
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+########################################################
import os
-import sys
-import types
-import itertools
-import struct
-import ctypes
import collections
import re
-import platform
import warnings
import array
-from cpython cimport PyErr_SetString, \
- PyBytes_Check, \
- PyUnicode_Check, \
- PyBytes_FromStringAndSize
-
-from cpython cimport array
-
+from cpython cimport array as c_array
from cpython.version cimport PY_MAJOR_VERSION
-cimport cython
-
-########################################################################
-########################################################################
-########################################################################
-## Python 3 compatibility functions
-########################################################################
-IS_PYTHON3 = PY_MAJOR_VERSION >= 3
-cdef from_string_and_size(char* s, size_t length):
- if PY_MAJOR_VERSION < 3:
- return s[:length]
- else:
- return s[:length].decode("ascii")
-
-# filename encoding (copied from lxml.etree.pyx)
-cdef str _FILENAME_ENCODING
-_FILENAME_ENCODING = sys.getfilesystemencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = sys.getdefaultencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = 'ascii'
-
-#cdef char* _C_FILENAME_ENCODING
-#_C_FILENAME_ENCODING = <char*>_FILENAME_ENCODING
-
-cdef bytes _encodeFilename(object filename):
- """Make sure a filename is 8-bit encoded (or None)."""
- if filename is None:
- return None
- elif PyBytes_Check(filename):
- return filename
- elif PyUnicode_Check(filename):
- return filename.encode(_FILENAME_ENCODING)
- else:
- raise TypeError, u"Argument must be string or unicode."
-
-cdef bytes _forceBytes(object s):
- u"""convert string or unicode object to bytes, assuming
- ascii encoding.
- """
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode('ascii')
- else:
- raise TypeError, u"Argument must be string, bytes or unicode."
-
-cdef inline bytes _forceCmdlineBytes(object s):
- return _forceBytes(s)
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport encode_filename, from_string_and_size
+from pysam.calignedsegment cimport makeAlignedSegment, makePileupColumn
-cdef _charptr_to_str(char* s):
- if PY_MAJOR_VERSION < 3:
- return s
- else:
- return s.decode("ascii")
-
-cdef _forceStr(object s):
- """Return s converted to str type of current Python
- (bytes in Py2, unicode in Py3)"""
- if s is None:
- return None
- if PY_MAJOR_VERSION < 3:
- return s
- elif PyBytes_Check(s):
- return s.decode('ascii')
- else:
- # assume unicode
- return s
+cimport cython
-########################################################################
-########################################################################
-########################################################################
+########################################################
## Constants and global variables
-########################################################################
# defines imported from samtools
DEF SEEK_SET = 0
DEF SEEK_CUR = 1
DEF SEEK_END = 2
-cdef char* CODE2CIGAR= "MIDNSHP=X"
-if IS_PYTHON3:
- CIGAR2CODE = dict( [y,x] for x,y in enumerate( CODE2CIGAR) )
-else:
- CIGAR2CODE = dict( [ord(y),x] for x,y in enumerate( CODE2CIGAR) )
-CIGAR_REGEX = re.compile( "(\d+)([MIDNSHP=X])" )
-
-#####################################################################
-# hard-coded constants
-cdef int max_pos = 2 << 29
-
-#####################################################################
-#####################################################################
-#####################################################################
-## private factory methods
-#####################################################################
-cdef class AlignedSegment
-cdef object makeAlignedSegment(bam1_t * src):
- '''enter src into AlignedSegment.'''
- # note that the following does not call __init__
- cdef AlignedSegment dest = AlignedSegment.__new__(AlignedSegment)
- dest._delegate = bam_dup1(src)
- return dest
-
-
-cdef class PileupColumn
-cdef makePileupColumn(bam_pileup1_t ** plp, int tid, int pos, int n_pu):
- # note that the following does not call __init__
- cdef PileupColumn dest = PileupColumn.__new__(PileupColumn)
- dest.plp = plp
- dest.tid = tid
- dest.pos = pos
- dest.n_pu = n_pu
- return dest
-
-cdef class PileupRead
-cdef makePileupRead(bam_pileup1_t * src):
- '''fill a PileupRead object from a bam_pileup1_t * object.'''
- cdef PileupRead dest = PileupRead.__new__(PileupRead)
- dest._alignment = makeAlignedSegment(src.b)
- dest._qpos = src.qpos
- dest._indel = src.indel
- dest._level = src.level
- dest._is_del = src.is_del
- dest._is_head = src.is_head
- dest._is_tail = src.is_tail
- dest._is_refskip = src.is_refskip
- return dest
-
-cdef convertBinaryTagToList( uint8_t * s ):
- """return bytesize, number of values list of values in s."""
- cdef char auxtype
- cdef uint8_t byte_size
- cdef int32_t nvalues
-
- # get byte size
- auxtype = s[0]
- byte_size = aux_type2size( auxtype )
- s += 1
- # get number of values in array
- nvalues = (<int32_t*>s)[0]
- s += 4
- # get values
- values = []
- if auxtype == 'c':
- for x from 0 <= x < nvalues:
- values.append((<int8_t*>s)[0])
- s += 1
- elif auxtype == 'C':
- for x from 0 <= x < nvalues:
- values.append((<uint8_t*>s)[0])
- s += 1
- elif auxtype == 's':
- for x from 0 <= x < nvalues:
- values.append((<int16_t*>s)[0])
- s += 2
- elif auxtype == 'S':
- for x from 0 <= x < nvalues:
- values.append((<uint16_t*>s)[0])
- s += 2
- elif auxtype == 'i':
- for x from 0 <= x < nvalues:
- values.append((<int32_t*>s)[0])
- s += 4
- elif auxtype == 'I':
- for x from 0 <= x < nvalues:
- values.append((<uint32_t*>s)[0])
- s += 4
- elif auxtype == 'f':
- for x from 0 <= x < nvalues:
- values.append((<float*>s)[0])
- s += 4
-
- return byte_size, nvalues, values
-
+# maximum genomic coordinace
+cdef int MAX_POS = 2 << 29
# valid types for SAM headers
VALID_HEADER_TYPES = {"HD" : dict,
@@ -214,13 +89,14 @@ VALID_HEADER_TYPES = {"HD" : dict,
VALID_HEADERS = ("HD", "SQ", "RG", "PG", "CO")
# default type conversions within SAM header records
-VALID_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str},
+KNOWN_HEADER_FIELDS = {"HD" : {"VN" : str, "SO" : str, "GO" : str},
"SQ" : {"SN" : str, "LN" : int, "AS" : str,
"M5" : str, "SP" : str, "UR" : str,},
"RG" : {"ID" : str, "CN" : str, "DS" : str,
"DT" : str, "FO" : str, "KS" : str,
"LB" : str, "PG" : str, "PI" : str,
- "PL" : str, "PU" : str, "SM" : str,},
+ "PL" : str, "PM" : str, "PU" : str,
+ "SM" : str,},
"PG" : {"ID" : str, "PN" : str, "CL" : str,
"PP" : str, "DS" : str, "VN" : str,},}
@@ -235,94 +111,250 @@ VALID_HEADER_ORDER = {"HD" : ("VN", "SO", "GO"),
"PP"),}
-cdef class AlignmentFile:
- '''*(filename, mode=None, template = None,
- reference_names=None, reference_lengths = None,
- text=NULL, header=None,
- add_sq_text=False, check_header=True,
- check_sq=True)*
+def build_header_line(fields, record):
+ '''build a header line from `fields` dictionary for `record`'''
+
+ # TODO: add checking for field and sort order
+ line = ["@%s" % record]
+ # comment
+ if record == "CO":
+ line.append(fields)
+ # user tags
+ elif record.islower():
+ for key in sorted(fields):
+ line.append("%s:%s" % (key, str(fields[key])))
+ # defined tags
+ else:
+ # write fields of the specification
+ for key in VALID_HEADER_ORDER[record]:
+ if key in fields:
+ line.append("%s:%s" % (key, str(fields[key])))
+ # write user fields
+ for key in fields:
+ if not key.isupper():
+ line.append("%s:%s" % (key, str(fields[key])))
+
+ return "\t".join(line)
+
+cdef bam_hdr_t * build_header(new_header):
+ '''return a new header built from a dictionary in `new_header`.
+
+ This method inserts the text field, target_name and target_len.
+ '''
+
+ lines = []
+
+ # check if hash exists
+
+ # create new header and copy old data
+ cdef bam_hdr_t * dest
- A :term:`SAM`/:term:`BAM` formatted file. The file is
- automatically opened.
+ dest = bam_hdr_init()
- *mode* should be ``r`` for reading or ``w`` for writing. The
- default is text mode (:term:`SAM`). For binary (:term:`BAM`) I/O
- you should append ``b`` for compressed or ``u`` for uncompressed
- :term:`BAM` output. Use ``h`` to output header information in
- text (:term:`TAM`) mode.
+ # first: defined tags
+ for record in VALID_HEADERS:
+ if record in new_header:
+ ttype = VALID_HEADER_TYPES[record]
+ data = new_header[record]
+ if type(data) != type(ttype()):
+ raise ValueError(
+ "invalid type for record %s: %s, expected %s" %
+ (record, type(data), type(ttype())))
+ if type(data) is dict:
+ lines.append(build_header_line(data, record))
+ else:
+ for fields in new_header[record]:
+ lines.append(build_header_line(fields, record))
+
+ # then: user tags (lower case), sorted alphabetically
+ for record, data in sorted(new_header.items()):
+ if record in VALID_HEADERS: continue
+ if type(data) is dict:
+ lines.append(build_header_line(data, record))
+ else:
+ for fields in new_header[record]:
+ lines.append(build_header_line(fields, record))
+
+ text = "\n".join(lines) + "\n"
+ if dest.text != NULL: free( dest.text )
+ dest.text = <char*>calloc(len(text), sizeof(char))
+ dest.l_text = len(text)
+ cdef bytes btext = text.encode('ascii')
+ strncpy(dest.text, btext, dest.l_text)
+
+ cdef bytes bseqname
+ # collect targets
+ if "SQ" in new_header:
+ seqs = []
+ for fields in new_header["SQ"]:
+ try:
+ seqs.append( (fields["SN"], fields["LN"] ) )
+ except KeyError:
+ raise KeyError( "incomplete sequence information in '%s'" % str(fields))
+
+ dest.n_targets = len(seqs)
+ dest.target_name = <char**>calloc(dest.n_targets, sizeof(char*))
+ dest.target_len = <uint32_t*>calloc(dest.n_targets, sizeof(uint32_t))
+
+ for x from 0 <= x < dest.n_targets:
+ seqname, seqlen = seqs[x]
+ dest.target_name[x] = <char*>calloc(
+ len(seqname) + 1, sizeof(char))
+ bseqname = seqname.encode('ascii')
+ strncpy(dest.target_name[x], bseqname,
+ len(seqname) + 1)
+ dest.target_len[x] = seqlen
+
+ return dest
- If ``b`` is present, it must immediately follow ``r`` or ``w``.
- Valid modes are ``r``, ``w``, ``wh``, ``rb``, ``wb``, ``wbu`` and
- ``wb0``. For instance, to open a :term:`BAM` formatted file for
- reading, type::
- f = pysam.AlignmentFile('ex1.bam','rb')
+cdef class AlignmentFile:
+ """
+ AlignmentFile(filepath_or_object, mode=None, template=None,
+ reference_names=None, reference_lengths=None, text=NULL,
+ header=None, add_sq_text=False, check_header=True, check_sq=True)
- If mode is not specified, we will try to auto-detect in the order
- 'rb', 'r', thus both the following should work::
+ A :term:`SAM`/:term:`BAM` formatted file.
- f1 = pysam.AlignmentFile('ex1.bam')
- f2 = pysam.AlignmentFile('ex1.sam')
+ If `filepath_or_object` is a string, the file is automatically
+ opened. If `filepath_or_object` is a python File object, the
+ already opened file will be used.
- If an index for a BAM file exists (.bai), it will be opened
- automatically. Without an index random access to reads via
- :meth:`fetch` and :meth:`pileup` is disabled.
+ If the file is opened for reading an index for a BAM file exists
+ (.bai), it will be opened automatically. Without an index random
+ access via :meth:`~pysam.AlignmentFile.fetch` and
+ :meth:`~pysam.AlignmentFile.pileup` is disabled.
For writing, the header of a :term:`SAM` file/:term:`BAM` file can
be constituted from several sources (see also the samtools format
specification):
- 1. If *template* is given, the header is copied from a another
- *AlignmentFile* (*template* must be of type *AlignmentFile*).
+ 1. If `template` is given, the header is copied from a another
+ `AlignmentFile` (`template` must be a
+ :class:`~pysam.AlignmentFile`).
- 2. If *header* is given, the header is built from a
- multi-level dictionary. The first level are the four types
- ('HD', 'SQ', ...). The second level are a list of lines,
- with each line being a list of tag-value pairs. The header
- is constructed first from all the defined fields, followed
- by user tags in alphabetical order.
+ 2. If `header` is given, the header is built from a
+ multi-level dictionary.
- 3. If *text* is given, new header text is copied from raw
+ 3. If `text` is given, new header text is copied from raw
text.
- 4. The names (*reference_names*) and lengths
- (*reference_lengths*) are supplied directly as lists. By
- default, 'SQ' and 'LN' tags will be added to the header
- text. This option can be changed by unsetting the flag
- *add_sq_text*.
+ 4. The names (`reference_names`) and lengths
+ (`reference_lengths`) are supplied directly as lists.
For writing a CRAM file, the filename of the reference can be
- added through a fasta formatted file (*reference_filename*)
+ added through a fasta formatted file (`reference_filename`)
By default, if a file is opened in mode 'r', it is checked
- for a valid header (*check_header* = True) and a definition of
- chromosome names (*check_sq* = True).
+ for a valid header (`check_header` = True) and a definition of
+ chromosome names (`check_sq` = True).
- '''
+ Parameters
+ ----------
+ mode : string
+ `mode` should be ``r`` for reading or ``w`` for writing. The
+ default is text mode (:term:`SAM`). For binary (:term:`BAM`) I/O
+ you should append ``b`` for compressed or ``u`` for uncompressed
+ :term:`BAM` output. Use ``h`` to output header information in
+ text (:term:`TAM`) mode.
+
+ If ``b`` is present, it must immediately follow ``r`` or ``w``.
+ Valid modes are ``r``, ``w``, ``wh``, ``rb``, ``wb``, ``wbu`` and
+ ``wb0``. For instance, to open a :term:`BAM` formatted file for
+ reading, type::
+
+ f = pysam.AlignmentFile('ex1.bam','rb')
+
+ If mode is not specified, the method will try to auto-detect
+ in the order 'rb', 'r', thus both the following should work::
+
+ f1 = pysam.AlignmentFile('ex1.bam')
+ f2 = pysam.AlignmentFile('ex1.sam')
+
+ template : AlignmentFile
+ when writing, copy header frem `template`.
+
+ header : dict
+ when writing, build header from a multi-level dictionary. The
+ first level are the four types ('HD', 'SQ', ...). The
+ second level are a list of lines, with each line being a
+ list of tag-value pairs. The header is constructed first
+ from all the defined fields, followed by user tags in
+ alphabetical order.
+
+ text : string
+ when writing, use the string provided as the header
+
+ reference_names : list
+ see referece_lengths
+
+ reference_lengths : list
+ when writing, build header from list of chromosome names and lengths.
+ By default, 'SQ' and 'LN' tags will be added to the header
+ text. This option can be changed by unsetting the flag
+ `add_sq_text`.
+
+ add_sq_text : bool
+ do not add 'SQ' and 'LN' tags to header. This option permits construction
+ :term:`SAM` formatted files without a header.
+
+ check_header : bool
+ when reading, check if header is present (default=True)
+
+ check_sq : bool
+ when reading, check if SQ entries are present in header (default=True)
+
+ """
+
+ def __cinit__(self, *args, **kwargs):
- def __cinit__(self, *args, **kwargs ):
self.htsfile = NULL
self._filename = None
self.is_bam = False
self.is_stream = False
self.is_cram = False
self.is_remote = False
-
+
self._open(*args, **kwargs)
# allocate memory for iterator
self.b = <bam1_t*>calloc(1, sizeof(bam1_t))
- def _isOpen(self):
+ def is_open(self):
'''return true if htsfile has been opened.'''
return self.htsfile != NULL
- def _hasIndex(self):
- '''return true if htsfile has an existing (and opened) index.'''
+ def has_index(self):
+ """return true if htsfile has an existing (and opened) index.
+ """
return self.index != NULL
+ def check_index(self):
+ """return True if index is present.
+
+ Raises
+ ------
+
+ AttributeError
+ if htsfile is :term:`SAM` formatted and thus has no index.
+
+ ValueError
+ if htsfile is closed or index could not be opened.
+ """
+
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file")
+ if not self.is_bam and not self.is_cram:
+ raise AttributeError(
+ "AlignmentFile.mapped only available in bam files")
+ if self.index == NULL:
+ raise ValueError(
+ "mapping information not recorded in index "
+ "or index not available")
+ return True
+
def _open(self,
- filename,
+ filepath_or_object,
mode=None,
AlignmentFile template=None,
reference_names=None,
@@ -341,6 +373,9 @@ cdef class AlignmentFile:
If _open is called on an existing file, the current file
will be closed and a new file will be opened.
'''
+ cdef char *cfilename
+ cdef char *cmode
+
# for backwards compatibility:
if referencenames is not None:
reference_names = referencenames
@@ -350,7 +385,8 @@ cdef class AlignmentFile:
# read mode autodetection
if mode is None:
try:
- self._open(filename, 'rb',
+ self._open(filepath_or_object,
+ 'rb',
template=template,
reference_names=reference_names,
reference_lengths=reference_lengths,
@@ -364,7 +400,8 @@ cdef class AlignmentFile:
except ValueError, msg:
pass
- self._open(filename, 'r',
+ self._open(filepath_or_object,
+ 'r',
template=template,
reference_names=reference_names,
reference_lengths=reference_lengths,
@@ -376,7 +413,7 @@ cdef class AlignmentFile:
check_sq=check_sq)
return
- assert mode in ("r","w","rb","wb", "wh",
+ assert mode in ("r", "w", "rb", "wb", "wh",
"wbu", "rU", "wb0",
"rc", "wc"), \
"invalid file opening mode `%s`" % mode
@@ -385,12 +422,20 @@ cdef class AlignmentFile:
if self.htsfile != NULL:
self.close()
+ # check if we are working with a File object
+ if hasattr(filepath_or_object, "fileno"):
+ filename = filepath_or_object.name
+ if filepath_or_object.closed:
+ raise ValueError('I/O operation on closed file')
+ else:
+ filename = filepath_or_object
+
# for htslib, wbu seems to not work
if mode == "wbu":
mode = "wb0"
cdef bytes bmode = mode.encode('ascii')
- self._filename = filename = _encodeFilename(filename)
+ self._filename = filename = encode_filename(filename)
# FIXME: Use htsFormat when it is available
self.is_bam = len(mode) > 1 and mode[1] == 'b'
@@ -400,6 +445,7 @@ cdef class AlignmentFile:
filename.startswith(b"ftp:")
cdef char * ctext
+ cdef hFILE * fp
ctext = NULL
if mode[0] == 'w':
@@ -409,7 +455,7 @@ cdef class AlignmentFile:
if template:
self.header = bam_hdr_dup(template.header)
elif header:
- self.header = self._buildHeader(header)
+ self.header = build_header(header)
else:
# build header from a target names and lengths
assert reference_names and reference_lengths, \
@@ -420,7 +466,7 @@ cdef class AlignmentFile:
"unequal names and lengths of reference sequences"
# allocate and fill header
- reference_names = [_forceBytes(ref) for ref in reference_names]
+ reference_names = [force_bytes(ref) for ref in reference_names]
self.header = bam_hdr_init()
self.header.n_targets = len(reference_names)
n = 0
@@ -443,13 +489,13 @@ cdef class AlignmentFile:
text = []
for x from 0 <= x < self.header.n_targets:
text.append("@SQ\tSN:%s\tLN:%s\n" % \
- (_forceStr(reference_names[x]),
+ (force_str(reference_names[x]),
reference_lengths[x]))
text = ''.join(text)
if text is not None:
# copy without \0
- text = _forceBytes(text)
+ text = force_bytes(text)
ctext = text
self.header.l_text = strlen(ctext)
self.header.text = <char*>calloc(
@@ -457,7 +503,14 @@ cdef class AlignmentFile:
memcpy(self.header.text, ctext, strlen(ctext))
# open file (hts_open is synonym with sam_open)
- self.htsfile = hts_open(filename, bmode)
+ cfilename, cmode = filename, bmode
+ if hasattr(filepath_or_object, "fileno"):
+ fp = hdopen(filepath_or_object.fileno(), cmode)
+ with nogil:
+ self.htsfile = hts_hopen(fp, cfilename, cmode)
+ else:
+ with nogil:
+ self.htsfile = hts_open(cfilename, cmode)
# set filename with reference sequences. If no filename
# is given, the CRAM reference arrays will be built from
@@ -465,12 +518,13 @@ cdef class AlignmentFile:
if self.is_cram and reference_filename:
# note that fn_aux takes ownership, so create
# a copy
- fn = _encodeFilename(reference_filename)
+ fn = encode_filename(reference_filename)
self.htsfile.fn_aux = strdup(fn)
# write header to htsfile
if self.is_bam or self.is_cram or "h" in mode:
- sam_hdr_write(self.htsfile, self.header)
+ with nogil:
+ sam_hdr_write(self.htsfile, self.header)
elif mode[0] == "r":
# open file for reading
@@ -480,7 +534,15 @@ cdef class AlignmentFile:
raise IOError("file `%s` not found" % filename)
# open file (hts_open is synonym with sam_open)
- self.htsfile = hts_open(filename, bmode)
+ cfilename, cmode = filename, bmode
+ if hasattr(filepath_or_object, "fileno"):
+ fp = hdopen(filepath_or_object.fileno(), cmode)
+ with nogil:
+ self.htsfile = hts_hopen(fp, cfilename, cmode)
+ else:
+ with nogil:
+ self.htsfile = hts_open(cfilename, cmode)
+
if self.htsfile == NULL:
raise ValueError(
"could not open file (mode='%s') - "
@@ -488,7 +550,8 @@ cdef class AlignmentFile:
# bam files require a valid header
if self.is_bam or self.is_cram:
- self.header = sam_hdr_read(self.htsfile)
+ with nogil:
+ self.header = sam_hdr_read(self.htsfile)
if self.header == NULL:
raise ValueError(
"file does not have valid header (mode='%s') "
@@ -497,7 +560,8 @@ cdef class AlignmentFile:
# in sam files it is optional (htsfile full of
# unmapped reads)
if check_header:
- self.header = sam_hdr_read(self.htsfile)
+ with nogil:
+ self.header = sam_hdr_read(self.htsfile)
if self.header == NULL:
raise ValueError(
"file does not have valid header (mode='%s') "
@@ -525,7 +589,9 @@ cdef class AlignmentFile:
# open index for remote files
if self.is_remote:
- self.index = hts_idx_load(filename, format_index)
+ cfilename = filename
+ with nogil:
+ self.index = hts_idx_load(cfilename, format_index)
if self.index == NULL:
warnings.warn(
"unable to open remote index for '%s'" % filename)
@@ -541,8 +607,10 @@ cdef class AlignmentFile:
else:
# returns NULL if there is no index or index could
# not be opened
- self.index = sam_index_load(self.htsfile,
- filename)
+ cfilename = filename
+ with nogil:
+ self.index = sam_index_load(self.htsfile,
+ cfilename)
if self.index == NULL:
raise IOError(
"error while opening index for '%s'" %
@@ -552,62 +620,135 @@ cdef class AlignmentFile:
if not self.is_stream:
self.start_offset = self.tell()
- def gettid(self, reference):
- '''
- convert :term:`reference` name into numerical :term:`tid`
+ def get_tid(self, reference):
+ """
+ return the numerical :term:`tid` corresponding to
+ :term:`reference`
returns -1 if reference is not known.
- '''
- if not self._isOpen():
+ """
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
- reference = _forceBytes(reference)
+ reference = force_bytes(reference)
return bam_name2id(self.header, reference)
- def getrname(self, tid):
- '''
- convert numerical :term:`tid` into :term:`reference` name.'''
- if not self._isOpen():
+ def get_reference_name(self, tid):
+ """
+ return :term:`reference` name corresponding to numerical :term:`tid`
+ """
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
if not 0 <= tid < self.header.n_targets:
raise ValueError("reference_id %i out of range 0<=tid<%i" %
(tid, self.header.n_targets))
- return _charptr_to_str(self.header.target_name[tid])
+ return charptr_to_str(self.header.target_name[tid])
- cdef char * _getrname(self, int tid): # TODO unused
- '''
- convert numerical :term:`tid` into :term:`reference` name.'''
- if not self._isOpen():
+ def reset(self):
+ """reset file position to beginning of file just after
+ the header.
+
+ Returns
+ -------
+
+ The file position after moving the file pointer.
+
+ """
+ return self.seek(self.start_offset, 0)
+
+ def seek(self, uint64_t offset, int where=0):
+ """move file pointer to position `offset`, see
+ :meth:`pysam.AlignmentFile.tell`.
+
+ Parameters
+ ----------
+
+ offset : int
+
+ position of the read/write pointer within the file.
+
+ where : int
+
+ optional and defaults to 0 which means absolute file
+ positioning, other values are 1 which means seek relative to
+ the current position and 2 means seek relative to the file's
+ end.
+
+ Returns
+ -------
+
+ the file position after moving the file pointer
+
+ """
+
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
+ if not self.is_bam:
+ raise NotImplementedError(
+ "seek only available in bam files")
+ if self.is_stream:
+ raise OSError("seek no available in streams")
- if not 0 <= tid < self.header.n_targets:
- raise ValueError("tid %i out of range 0<=tid<%i" %
- (tid, self.header.n_targets ))
- return self.header.target_name[tid]
+ cdef uint64_t pos
+ with nogil:
+ pos = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, where)
+ return pos
+
+ def tell(self):
+ """
+ return current file position.
+ """
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file")
+ if not (self.is_bam or self.is_cram):
+ raise NotImplementedError(
+ "seek only available in bam files")
- def _parseRegion(self,
+ cdef uint64_t pos
+ with nogil:
+ pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
+ return pos
+
+ def parse_region(self,
reference=None,
start=None,
end=None,
region=None,
tid=None):
- '''parse region information.
-
- Raises ValueError for invalid regions.
+ """parse alternative ways to specify a genomic region. A region can
+ either be specified by :term:`reference`, `start` and
+ `end`. `start` and `end` denote 0-based, half-open
+ intervals.
- Returns a tuple of a flag, :term:`tid`, start and end. The
- flag indicates whether some coordinates were supplied.
+ Alternatively, a samtools :term:`region` string can be
+ supplied.
+
+ If any of the coordinates are missing they will be replaced by the
+ minimum (`start`) or maximum (`end`) coordinate.
- Note that region strings are 1-based, while *start* and *end* denote
+ Note that region strings are 1-based, while `start` and `end` denote
an interval in python coordinates.
- '''
+ Returns
+ -------
+
+ tuple : a tuple of `flag`, :term:`tid`, `start` and `end`. The
+ flag indicates whether no coordinates were supplied and the
+ genomic region is the complete genomic space.
+
+ Raises
+ ------
+
+ ValueError
+ for invalid or out of bounds regions.
+
+ """
cdef int rtid
cdef long long rstart
cdef long long rend
rtid = -1
rstart = 0
- rend = max_pos
+ rend = MAX_POS
if start != None:
try:
rstart = start
@@ -621,7 +762,7 @@ cdef class AlignmentFile:
raise ValueError('end out of range (%i)' % end)
if region:
- region = _forceStr(region)
+ region = force_str(region)
parts = re.split("[:-]", region)
reference = parts[0]
if len(parts) >= 2:
@@ -643,92 +784,85 @@ cdef class AlignmentFile:
if rstart > rend:
raise ValueError(
'invalid coordinates: start (%i) > end (%i)' % (rstart, rend))
- if not 0 <= rstart < max_pos:
+ if not 0 <= rstart < MAX_POS:
raise ValueError('start out of range (%i)' % rstart)
- if not 0 <= rend <= max_pos:
+ if not 0 <= rend <= MAX_POS:
raise ValueError('end out of range (%i)' % rend)
return 1, rtid, rstart, rend
- def reset(self):
- '''reset file position to beginning of file just after
- the header.'''
- return self.seek(self.start_offset, 0)
-
- def seek(self, uint64_t offset, int where = 0):
- '''move file pointer to position *offset*, see
- :meth:`pysam.AlignmentFile.tell`.
- '''
-
- if not self._isOpen():
- raise ValueError("I/O operation on closed file")
- if not self.is_bam:
- raise NotImplementedError(
- "seek only available in bam files")
- if self.is_stream:
- raise OSError("seek no available in streams")
-
- return bgzf_seek(hts_get_bgzfp(self.htsfile), offset, where)
-
- def tell(self):
- '''
- return current file position.
- '''
- if not self._isOpen():
- raise ValueError("I/O operation on closed file")
- if not (self.is_bam or self.is_cram):
- raise NotImplementedError(
- "seek only available in bam files")
-
- return bgzf_tell(hts_get_bgzfp(self.htsfile))
-
def fetch(self,
reference=None,
start=None,
end=None,
region=None,
tid=None,
- callback=None,
until_eof=False,
multiple_iterators=False):
- '''fetch aligned, i.e. mapped, reads in a :term:`region`
- using 0-based
- indexing. The region is specified by :term:`reference`,
- *start* and *end*. Alternatively, a samtools :term:`region`
- string can be supplied.
-
- Without *reference* or *region* all mapped reads will be
- fetched. The reads will be returned ordered by reference
- sequence, which will not necessarily be the order within the
- file.
+ """fetch reads aligned in a :term:`region`.
- If *until_eof* is given, all reads from the current file
- position will be returned in order as they are within the
- file. Using this option will also fetch unmapped reads.
+ See :meth:`AlignmentFile.parse_region` for more information
+ on genomic regions.
- Set *multiple_iterators* to true if you will be using multiple
- iterators on the same file at the same time. The iterator
- returned will receive its own copy of a filehandle to the file
- effectively re-opening the file. Re-opening a file creates
- some overhead, so beware.
+ Without a `reference` or `region` all mapped reads in the file
+ will be fetched. The reads will be returned ordered by reference
+ sequence, which will not necessarily be the order within the
+ file. This mode of iteration still requires an index. If there is
+ no index, use `until_eof=True`.
- If only *reference* is set, all reads aligned to *reference*
+ If only `reference` is set, all reads aligned to `reference`
will be fetched.
Note that a :term:`SAM` file does not allow random access. If
- *region* or *reference* are given, an exception is raised.
+ `region` or `reference` are given, an exception is raised.
- '''
+ :class:`~pysam.FastaFile`
+ :class:`~pysam.IteratorRow`
+ :class:`~pysam.IteratorRow`
+ :class:`~IteratorRow`
+ :class:`IteratorRow`
+
+ Parameters
+ ----------
+
+ until_eof : bool
+
+ If `until_eof` is True, all reads from the current file
+ position will be returned in order as they are within the
+ file. Using this option will also fetch unmapped reads.
+
+ multiple_iterators : bool
+
+ If `multiple_iterators` is True (default) multiple
+ iterators on the same file can be used at the same time. The
+ iterator returned will receive its own copy of a filehandle to
+ the file effectively re-opening the file. Re-opening a file
+ creates some overhead, so beware.
+
+ Returns
+ -------
+
+ An iterator over a collection of reads.
+
+ Raises
+ ------
+
+ ValueError
+ if the genomic coordinates are out of range or invalid or the
+ file does not permit random access to genomic coordinates.
+
+ """
cdef int rtid, rstart, rend, has_coord
- if not self._isOpen():
+ if not self.is_open():
raise ValueError( "I/O operation on closed file" )
- has_coord, rtid, rstart, rend = self._parseRegion(reference,
- start,
- end,
- region,
- tid)
+ has_coord, rtid, rstart, rend = self.parse_region(
+ reference,
+ start,
+ end,
+ region,
+ tid)
# Turn of re-opening if htsfile is a stream
if self.is_stream:
@@ -736,7 +870,7 @@ cdef class AlignmentFile:
if self.is_bam or self.is_cram:
if not until_eof and not self.is_remote:
- if not self._hasIndex():
+ if not self.has_index():
raise ValueError(
"fetch called on bamfile without index")
@@ -760,10 +894,6 @@ cdef class AlignmentFile:
raise ValueError(
"fetching by region is not available for sam files")
- if callback:
- raise NotImplementedError(
- "callback not implemented yet")
-
if self.header == NULL:
raise ValueError(
"fetch called for htsfile without header")
@@ -777,22 +907,29 @@ cdef class AlignmentFile:
multiple_iterators=multiple_iterators)
def head(self, n, multiple_iterators=True):
- '''return iterator over the first n alignments.
+ '''return an iterator over the first n alignments.
- This is useful for inspecting the bam-file.
+ This iterator is is useful for inspecting the bam-file.
- *multiple_iterators* is set to True by default in order to
- avoid changing the current file position.
+ Parameters
+ ----------
+
+ multiple_iterators : bool
+
+ is set to True by default in order to
+ avoid changing the current file position.
+
+ Returns
+ -------
+
+ an iterator over a collection of reads
+
'''
return IteratorRowHead(self, n,
multiple_iterators=multiple_iterators)
- def mate(self,
- AlignedSegment read):
- '''return the mate of :class:`AlignedSegment` *read*.
-
- Throws a ValueError if read is unpaired or the mate
- is unmapped.
+ def mate(self, AlignedSegment read):
+ '''return the mate of :class:`~pysam.AlignedSegment` `read`.
.. note::
@@ -806,6 +943,17 @@ cdef class AlignmentFile:
If a read needs to be processed with its mate, work
from a read name sorted file or, better, cache reads.
+ Returns
+ -------
+
+ :class:`~pysam.AlignedSegment` : the mate
+
+ Raises
+ ------
+
+ ValueError
+ if the read is unpaired or the mate is unmapped
+
'''
cdef uint32_t flag = read._delegate.core.flag
@@ -836,69 +984,40 @@ cdef class AlignmentFile:
break
else:
raise ValueError("mate not found")
-
- return mate
-
- def count(self,
- reference=None,
- start=None,
- end=None,
- region=None,
- until_eof=False):
- '''*(reference = None, start = None, end = None,
- region = None, callback = None, until_eof = False)*
-
- count reads :term:`region` using 0-based indexing. The region
- is specified by :term:`reference`, *start* and
- *end*. Alternatively, a samtools :term:`region` string can be
- supplied.
-
- Note that a :term:`SAM` file does not allow random access. If
- *region* or *reference* are given, an exception is raised.
- '''
- cdef AlignedSegment read
- cdef long counter = 0
-
- if not self._isOpen():
- raise ValueError( "I/O operation on closed file" )
-
- for read in self.fetch(reference=reference,
- start=start,
- end=end,
- region=region,
- until_eof=until_eof):
- counter += 1
- return counter
+ return mate
- def pileup( self,
- reference = None,
- start = None,
- end = None,
- region = None,
- **kwargs ):
- '''perform a :term:`pileup` within a :term:`region`. The region is
- specified by :term:`reference`, *start* and *end* (using
- 0-based indexing). Alternatively, a samtools *region* string
+ def pileup(self,
+ reference=None,
+ start=None,
+ end=None,
+ region=None,
+ **kwargs):
+ """perform a :term:`pileup` within a :term:`region`. The region is
+ specified by :term:`reference`, 'start' and 'end' (using
+ 0-based indexing). Alternatively, a samtools 'region' string
can be supplied.
- Without *reference* or *region* all reads will be used for the
+ Without 'reference' or 'region' all reads will be used for the
pileup. The reads will be returned ordered by
:term:`reference` sequence, which will not necessarily be the
order within the file.
- The method returns an iterator of type
- :class:`pysam.IteratorColumn` unless a *callback is
- provided. If a *callback* is given, the callback will be
- executed for each column within the :term:`region`.
-
Note that :term:`SAM` formatted files do not allow random
- access. In these files, if a *region* or *reference* are
+ access. In these files, if a 'region' or 'reference' are
given an exception is raised.
- Optional *kwargs* to the iterator:
+ .. note::
+
+ 'all' reads which overlap the region are returned. The
+ first base returned will be the first base of the first
+ read 'not' necessarily the first base of the region used
+ in the query.
- stepper
+ Parameters
+ ----------
+
+ stepper : string
The stepper controlls how the iterator advances.
Possible options for the stepper are
@@ -908,120 +1027,223 @@ cdef class AlignmentFile:
``nofilter``
uses every single read
-
``samtools``
same filter and read processing as in :term:`csamtools`
- pileup. This requires a *fastafile* to be given.
+ pileup. This requires a 'fastafile' to be given.
- fastafile
- A :class:`~pysam.FastaFile` object. This is required for
- some of the steppers.
+ fastafile : :class:`~pysam.FastaFile` object.
- mask
- Skip all reads with bits set in mask if mask=True.
+ This is required for some of the steppers.
- max_depth
- Maximum read depth permitted. The default limit is *8000*.
+ max_depth : int
+ Maximum read depth permitted. The default limit is '8000'.
- truncate
+ truncate : bool
By default, the samtools pileup engine outputs all reads
- overlapping a region (see note below). If truncate is True
- and a region is given, only output columns in the exact
- region specificied.
+ overlapping a region. If truncate is True and a region is
+ given, only columns in the exact region specificied are
+ returned.
- .. note::
+ Returns
+ -------
- *all* reads which overlap the region are returned. The
- first base returned will be the first base of the first
- read *not* necessarily the first base of the region used
- in the query.
+ an iterator over genomic positions.
- '''
+ """
cdef int rtid, rstart, rend, has_coord
- if not self._isOpen():
- raise ValueError( "I/O operation on closed file" )
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file")
- has_coord, rtid, rstart, rend = self._parseRegion(
- reference, start, end, region )
+ has_coord, rtid, rstart, rend = self.parse_region(
+ reference, start, end, region)
if self.is_bam or self.is_cram:
- if not self._hasIndex():
+ if not self.has_index():
raise ValueError("no index available for pileup")
if has_coord:
return IteratorColumnRegion(self,
- tid = rtid,
- start = rstart,
- end = rend,
+ tid=rtid,
+ start=rstart,
+ end=rend,
**kwargs )
else:
return IteratorColumnAllRefs(self, **kwargs )
else:
- raise NotImplementedError( "pileup of samfiles not implemented yet" )
+ raise NotImplementedError(
+ "pileup of samfiles not implemented yet")
- @cython.boundscheck(False) # we do manual bounds checking
- def count_coverage(self, chr, start, stop, quality_threshold = 15,
- read_callback = 'all'):
- """Count ACGT in a part of a AlignmentFile.
- Return 4 array.arrays of length = stop - start,
- in order A C G T.
+ def count(self,
+ reference=None,
+ start=None,
+ end=None,
+ region=None,
+ until_eof=False):
+ '''
+ count the number of reads in :term:`region`
+
+ The region is specified by :term:`reference`, `start` and
+ `end`. Alternatively, a :term:`samtools` :term:`region` string
+ can be supplied.
+
+ Note that a :term:`SAM` file does not allow random access and if
+ `region` or `reference` are given, an exception is raised.
+
+ Parameters
+ ----------
- @quality_threshold is the minimum quality score (in phred) a
- base has to reach to be counted. Possible @read_callback
- values are
+ reference : string
+ reference_name of the genomic region (chromosome)
- ``all``
-` skip reads in which any of the following
- flags are set: BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL,
- BAM_FDUP
+ start : int
+ start of the genomic region
- ``nofilter``
- uses every single read
+ end : int
+ end of the genomic region
- Alternatively, @read_callback can be a function ```check_read(read)``1
- that should return True only for those reads that shall be included in
- the counting.
+ until_eof : bool
+ count until the end of the file, possibly including
+ unmapped reads as well.
- """
-
- cdef int _start = start
- cdef int _stop = stop
- cdef int length = _stop - _start
- cdef array.array int_array_template = array.array('L', [])
- cdef array.array count_a
- cdef array.array count_c
- cdef array.array count_g
- cdef array.array count_t
- count_a = array.clone(int_array_template, length, zero=True)
- count_c = array.clone(int_array_template, length, zero=True)
- count_g = array.clone(int_array_template, length, zero=True)
- count_t = array.clone(int_array_template, length, zero=True)
-
- cdef char * seq
- cdef array.array quality
- cdef int qpos
- cdef int refpos
- cdef int c = 0
- cdef int _threshold = quality_threshold
- for read in self.fetch(chr, start, stop):
- if read_callback == 'all':
- if (read.flag & (0x4 | 0x100 | 0x200 | 0x400)):
- continue
- elif read_callback == 'nofilter':
- pass
- else:
- if not read_callback(read):
- continue
- seq = read.seq
+ Raises
+ ------
+
+ ValueError
+ if the genomic coordinates are out of range or invalid.
+
+ '''
+ cdef AlignedSegment read
+ cdef long counter = 0
+
+ if not self.is_open():
+ raise ValueError( "I/O operation on closed file" )
+
+ for read in self.fetch(reference=reference,
+ start=start,
+ end=end,
+ region=region,
+ until_eof=until_eof):
+ counter += 1
+
+ return counter
+
+ @cython.boundscheck(False) # we do manual bounds checking
+ def count_coverage(self,
+ reference=None,
+ start=None,
+ end=None,
+ region=None,
+ quality_threshold=15,
+ read_callback='all'):
+ """count the coverage of genomic positions by reads in :term:`region`.
+
+ The region is specified by :term:`reference`, `start` and
+ `end`. Alternatively, a :term:`samtools` :term:`region` string
+ can be supplied. The coverage is computed per-base [ACGT].
+
+ Parameters
+ ----------
+
+ reference : string
+ reference_name of the genomic region (chromosome)
+
+ start : int
+ start of the genomic region
+
+ end : int
+ end of the genomic region
+
+ region : int
+ a region string.
+
+ quality_threshold : int
+ quality_threshold is the minimum quality score (in phred) a
+ base has to reach to be counted.
+
+ read_callback: string or function
+
+ select a call-back to ignore reads when counting. It can
+ be either a string with the following values:
+
+ ``all``
+ skip reads in which any of the following
+ flags are set: BAM_FUNMAP, BAM_FSECONDARY, BAM_FQCFAIL,
+ BAM_FDUP
+
+ ``nofilter``
+ uses every single read
+
+ Alternatively, `read_callback` can be a function
+ ``check_read(read)`` that should return True only for
+ those reads that shall be included in the counting.
+
+ Raises
+ ------
+
+ ValueError
+ if the genomic coordinates are out of range or invalid.
+
+ Returns
+ -------
+
+ four array.arrays of the same length in order A C G T : tuple
+
+ """
+
+ cdef int _start = start
+ cdef int _stop = end
+ cdef int length = _stop - _start
+ cdef c_array.array int_array_template = array.array('L', [])
+ cdef c_array.array count_a
+ cdef c_array.array count_c
+ cdef c_array.array count_g
+ cdef c_array.array count_t
+ count_a = c_array.clone(int_array_template, length, zero=True)
+ count_c = c_array.clone(int_array_template, length, zero=True)
+ count_g = c_array.clone(int_array_template, length, zero=True)
+ count_t = c_array.clone(int_array_template, length, zero=True)
+
+ cdef AlignedSegment read
+ cdef cython.str seq
+ cdef c_array.array quality
+ cdef int qpos
+ cdef int refpos
+ cdef int c = 0
+ cdef int filter_method = 0
+ if read_callback == "all":
+ filter_method = 1
+ elif read_callback == "nofilter":
+ filter_method = 2
+
+ cdef int _threshold = quality_threshold
+ for read in self.fetch(reference=reference,
+ start=start,
+ end=end,
+ region=region):
+ # apply filter
+ if filter_method == 1:
+ # filter = "all"
+ if (read.flag & (0x4 | 0x100 | 0x200 | 0x400)):
+ continue
+ elif filter_method == 2:
+ # filter = "nofilter"
+ pass
+ else:
+ if not read_callback(read):
+ continue
+
+ # count
+ seq = read.seq
quality = read.query_qualities
for qpos, refpos in read.get_aligned_pairs(True):
- if qpos is not None and refpos is not None and _start <= refpos < _stop:
- if quality[qpos] > quality_threshold:
+ if qpos is not None and refpos is not None and \
+ _start <= refpos < _stop:
+ if quality[qpos] >= quality_threshold:
if seq[qpos] == 'A':
count_a.data.as_ulongs[refpos - _start] += 1
if seq[qpos] == 'C':
@@ -1030,6 +1252,7 @@ cdef class AlignmentFile:
count_g.data.as_ulongs[refpos - _start] += 1
if seq[qpos] == 'T':
count_t.data.as_ulongs[refpos - _start] += 1
+
return count_a, count_c, count_g, count_t
def close(self):
@@ -1049,8 +1272,14 @@ cdef class AlignmentFile:
# close within __dealloc__ (see BCFFile.__dealloc__). Not a pretty
# solution and perhaps unnecessary given that calling self.close has
# been working for years.
+ # AH: I have removed the call to close. Even though it is working,
+ # it seems to be dangerous according to the documentation as the
+ # object be partially deconstructed already.
+ if self.htsfile != NULL:
+ hts_close(self.htsfile)
+ hts_idx_destroy(self.index);
+ self.htsfile = NULL
- self.close()
bam_destroy1(self.b)
if self.header != NULL:
bam_hdr_destroy(self.header)
@@ -1059,14 +1288,26 @@ cdef class AlignmentFile:
'''
write a single :class:`pysam.AlignedSegment` to disk.
- returns the number of bytes written.
+ Raises
+ ------
+ ValueError
+ if the writing failed
+
+ Returns
+ -------
+
+ int : the number of bytes written. If the file is closed,
+ this will be 0.
'''
- if not self._isOpen():
+ if not self.is_open():
return 0
- cdef int ret = sam_write1(self.htsfile,
- self.header,
- read._delegate)
+ cdef int ret
+
+ with nogil:
+ ret = sam_write1(self.htsfile,
+ self.header,
+ read._delegate)
# kbj: Still need to raise an exception with except -1. Otherwise
# when ret == -1 we get a "SystemError: error return without
@@ -1076,6 +1317,7 @@ cdef class AlignmentFile:
return ret
+ # context manager interface
def __enter__(self):
return self
@@ -1088,34 +1330,44 @@ cdef class AlignmentFile:
###############################################################
## properties
###############################################################
+ property closed:
+ """bool indicating the current state of the file object.
+ This is a read-only attribute; the close() method changes the value.
+ """
+ def __get__(self):
+ return not self.is_open()
+
property filename:
- '''filename associated with this object.'''
+ """filename associated with this object. This is a read-only attribute."""
def __get__(self):
return self._filename
property nreferences:
- '''number of :term:`reference` sequences in the file.'''
+ """"int with the number of :term:`reference` sequences in the file.
+ This is a read-only attribute."""
def __get__(self):
- if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file")
return self.header.n_targets
property references:
- """tuple with the names of :term:`reference` sequences."""
+ """tuple with the names of :term:`reference` sequences. This is a
+ read-only attribute"""
def __get__(self):
- if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
+ if not self.is_open(): raise ValueError( "I/O operation on closed file" )
t = []
for x from 0 <= x < self.header.n_targets:
- t.append(_charptr_to_str(self.header.target_name[x]))
+ t.append(charptr_to_str(self.header.target_name[x]))
return tuple(t)
property lengths:
- """tuple of the lengths of the :term:`reference` sequences. The
- lengths are in the same order as
+ """tuple of the lengths of the :term:`reference` sequences. This is a
+ read-only attribute. The lengths are in the same order as
:attr:`pysam.AlignmentFile.references`
"""
def __get__(self):
- if not self._isOpen():
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
t = []
for x from 0 <= x < self.header.n_targets:
@@ -1123,70 +1375,66 @@ cdef class AlignmentFile:
return tuple(t)
property mapped:
- """total number of mapped alignments according
- to the statistics recorded in the index.
+ """int with total number of mapped alignments according to the
+ statistics recorded in the index. This is a read-only
+ attribute.
"""
def __get__(self):
- self._checkIndex()
+ self.check_index()
cdef int tid
cdef uint64_t total = 0
cdef uint64_t mapped, unmapped
for tid from 0 <= tid < self.header.n_targets:
- hts_idx_get_stat(self.index, tid, &mapped, &unmapped)
+ with nogil:
+ hts_idx_get_stat(self.index, tid, &mapped, &unmapped)
total += mapped
return total
- def _checkIndex(self):
- '''check if index is present. Otherwise raise
- an error.'''
- if not self._isOpen():
- raise ValueError("I/O operation on closed file")
- if not self.is_bam and not self.is_cram:
- raise AttributeError(
- "AlignmentFile.mapped only available in bam files")
- if self.index == NULL:
- raise ValueError(
- "mapping information not recorded in index "
- "or index not available")
-
-
property unmapped:
- """total number of unmapped reads according
- to the statistics recorded in the index.
+ """int with total number of unmapped reads according to the statistics
+ recorded in the index. This number of reads includes the number of reads
+ without coordinates. This is a read-only attribute.
"""
def __get__(self):
- self._checkIndex()
+ self.check_index()
cdef int tid
- cdef uint64_t total = 0
+ cdef uint64_t total = hts_idx_get_n_no_coor(self.index)
cdef uint64_t mapped, unmapped
for tid from 0 <= tid < self.header.n_targets:
- hts_idx_get_stat(self.index, tid, &mapped, &unmapped)
+ with nogil:
+ hts_idx_get_stat(self.index, tid, &mapped, &unmapped)
total += unmapped
return total
property nocoordinate:
- """total number of reads without coordinates according
- to the statistics recorded in the index.
+ """int with total number of reads without coordinates according to the
+ statistics recorded in the index. This is a read-only attribute.
"""
def __get__(self):
- self._checkIndex()
- return hts_idx_get_n_no_coor(self.index)
+ self.check_index()
+ cdef uint64_t n
+ with nogil:
+ n = hts_idx_get_n_no_coor(self.index)
+ return n
property text:
- '''full contents of the :term:`sam file` header as a string
-
+ '''string with the full contents of the :term:`sam file` header as a
+ string.
+
+ This is a read-only attribute.
+
See :attr:`pysam.AlignmentFile.header` to get a parsed
representation of the header.
-
'''
def __get__(self):
- if not self._isOpen():
+ if not self.is_open():
raise ValueError( "I/O operation on closed file" )
return from_string_and_size(self.header.text, self.header.l_text)
property header:
- '''header information within the :term:`sam file`. The records and
- fields are returned as a two-level dictionary.
+ """two-level dictionay with header information from the file.
+
+ This is a read-only attribute.
The first level contains the record (``HD``, ``SQ``, etc) and
the second level contains the fields (``VN``, ``LN``, etc).
@@ -1204,9 +1452,9 @@ cdef class AlignmentFile:
options that contain characters that are not valid field
separators.
- '''
+ """
def __get__(self):
- if not self._isOpen():
+ if not self.is_open():
raise ValueError( "I/O operation on closed file" )
result = {}
@@ -1245,19 +1493,11 @@ cdef class AlignmentFile:
# header. Thus, in contravention to the
# SAM API, consume the rest of the line.
key, value = "\t".join(fields[idx+1:]).split(":", 1)
- x[key] = VALID_HEADER_FIELDS[record][key](value)
+ x[key] = KNOWN_HEADER_FIELDS[record][key](value)
break
- # uppercase keys must be valid
- if key in VALID_HEADER_FIELDS[record]:
- x[key] = VALID_HEADER_FIELDS[record][key](value)
- # lowercase are permitted for user fields
- elif not key.isupper():
- x[key] = value
- else:
- raise ValueError(
- "unknown field code '%s' in record '%s'" %
- (key, record))
+ # interpret type of known header record tags, default to str
+ x[key] = KNOWN_HEADER_FIELDS[record].get(key, str)(value)
if VALID_HEADER_TYPES[record] == dict:
if record in result:
@@ -1269,8 +1509,9 @@ cdef class AlignmentFile:
if record not in result: result[record] = []
result[record].append(x)
- # if there are no SQ lines in the header, add the reference names
- # from the information in the bam file.
+ # if there are no SQ lines in the header, add the
+ # reference names from the information in the bam
+ # file.
#
# Background: c-samtools keeps the textual part of the
# header separate from the list of reference names and
@@ -1285,113 +1526,14 @@ cdef class AlignmentFile:
return result
- def _buildLine(self, fields, record):
- '''build a header line from *fields* dictionary for *record*'''
-
- # TODO: add checking for field and sort order
- line = ["@%s" % record]
- # comment
- if record == "CO":
- line.append(fields)
- # user tags
- elif record.islower():
- for key in sorted(fields):
- line.append("%s:%s" % (key, str(fields[key])))
- # defined tags
- else:
- # write fields of the specification
- for key in VALID_HEADER_ORDER[record]:
- if key in fields:
- line.append("%s:%s" % (key, str(fields[key])))
- # write user fields
- for key in fields:
- if not key.isupper():
- line.append("%s:%s" % (key, str(fields[key])))
-
- return "\t".join(line)
-
- cdef bam_hdr_t * _buildHeader(self, new_header):
- '''return a new header built from a dictionary in *new_header*.
-
- This method inserts the text field, target_name and target_len.
- '''
-
- lines = []
-
- # check if hash exists
-
- # create new header and copy old data
- cdef bam_hdr_t * dest
-
- dest = bam_hdr_init()
-
- # first: defined tags
- for record in VALID_HEADERS:
- if record in new_header:
- ttype = VALID_HEADER_TYPES[record]
- data = new_header[record]
- if type(data) != type(ttype()):
- raise ValueError(
- "invalid type for record %s: %s, expected %s" %
- (record, type(data), type(ttype())))
- if type(data) is dict:
- lines.append(self._buildLine(data, record))
- else:
- for fields in new_header[record]:
- lines.append(self._buildLine(fields, record))
-
- # then: user tags (lower case), sorted alphabetically
- for record, data in sorted(new_header.items()):
- if record in VALID_HEADERS: continue
- if type( data ) is dict:
- lines.append( self._buildLine( data, record ) )
- else:
- for fields in new_header[record]:
- lines.append( self._buildLine( fields, record ) )
-
- text = "\n".join(lines) + "\n"
- if dest.text != NULL: free( dest.text )
- dest.text = <char*>calloc( len(text), sizeof(char))
- dest.l_text = len(text)
- cdef bytes btext = text.encode('ascii')
- strncpy( dest.text, btext, dest.l_text )
-
- cdef bytes bseqname
- # collect targets
- if "SQ" in new_header:
- seqs = []
- for fields in new_header["SQ"]:
- try:
- seqs.append( (fields["SN"], fields["LN"] ) )
- except KeyError:
- raise KeyError( "incomplete sequence information in '%s'" % str(fields))
-
- dest.n_targets = len(seqs)
- dest.target_name = <char**>calloc(dest.n_targets, sizeof(char*))
- dest.target_len = <uint32_t*>calloc(dest.n_targets, sizeof(uint32_t))
-
- for x from 0 <= x < dest.n_targets:
- seqname, seqlen = seqs[x]
- dest.target_name[x] = <char*>calloc(
- len(seqname) + 1, sizeof(char))
- bseqname = seqname.encode('ascii')
- strncpy(dest.target_name[x], bseqname,
- len(seqname) + 1)
- dest.target_len[x] = seqlen
-
- return dest
-
- ###############################################################
- ###############################################################
###############################################################
## file-object like iterator access
## note: concurrent access will cause errors (see IteratorRow
## and multiple_iterators)
## Possible solutions: deprecate or open new file handle
- ###############################################################
def __iter__(self):
- if not self._isOpen():
- raise ValueError( "I/O operation on closed file" )
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file")
if not self.is_bam and self.header.n_targets == 0:
raise NotImplementedError(
@@ -1405,21 +1547,28 @@ cdef class AlignmentFile:
'''
cversion of iterator. Used by :class:`pysam.AlignmentFile.IteratorColumn`.
'''
- return sam_read1(self.htsfile,
- self.header,
- self.b)
+ cdef int ret
+ with nogil:
+ ret = sam_read1(self.htsfile,
+ self.header,
+ self.b)
+ return ret
def __next__(self):
- """
- python version of next().
- """
cdef int ret = self.cnext()
if (ret >= 0):
- return makeAlignedSegment(self.b)
+ return makeAlignedSegment(self.b, self)
elif ret == -2:
raise IOError('truncated file')
else:
raise StopIteration
+
+ # Compatibility functions for pysam < 0.8.3
+ def gettid(self, reference):
+ return self.get_tid(reference)
+
+ def getrname(self, tid):
+ return self.get_reference_name(tid)
cdef class IteratorRow:
@@ -1442,14 +1591,15 @@ cdef class IteratorRow:
.. note::
It is usually not necessary to create an object of this class
- explicitely. It is returned as a result of call to a
+ explicitly. It is returned as a result of call to a
:meth:`AlignmentFile.fetch`.
'''
def __init__(self, AlignmentFile samfile, int multiple_iterators=False):
+ cdef char *cfilename
- if not samfile._isOpen():
+ if not samfile.is_open():
raise ValueError("I/O operation on closed file")
# makes sure that samfile stays alive as long as the
@@ -1459,11 +1609,14 @@ cdef class IteratorRow:
# reopen the file - note that this makes the iterator
# slow and causes pileup to slow down significantly.
if multiple_iterators:
- self.htsfile = hts_open(samfile._filename, 'r')
+ cfilename = samfile._filename
+ with nogil:
+ self.htsfile = hts_open(cfilename, 'r')
assert self.htsfile != NULL
# read header - required for accurate positioning
# could a tell/seek work?
- self.header = sam_hdr_read(self.htsfile)
+ with nogil:
+ self.header = sam_hdr_read(self.htsfile)
assert self.header != NULL
self.owns_samfile = True
else:
@@ -1491,7 +1644,7 @@ cdef class IteratorRowRegion(IteratorRow):
.. note::
It is usually not necessary to create an object of this class
- explicitely. It is returned as a result of call to a
+ explicitly. It is returned as a result of call to a
:meth:`AlignmentFile.fetch`.
"""
@@ -1503,14 +1656,15 @@ cdef class IteratorRowRegion(IteratorRow):
IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)
- if not samfile._hasIndex():
+ if not samfile.has_index():
raise ValueError("no index available for iteration")
- self.iter = sam_itr_queryi(
- self.samfile.index,
- tid,
- beg,
- end)
+ with nogil:
+ self.iter = sam_itr_queryi(
+ self.samfile.index,
+ tid,
+ beg,
+ end)
def __iter__(self):
return self
@@ -1520,17 +1674,16 @@ cdef class IteratorRowRegion(IteratorRow):
cdef int cnext(self):
'''cversion of iterator. Used by IteratorColumn'''
- self.retval = hts_itr_next(hts_get_bgzfp(self.htsfile),
- self.iter,
- self.b,
- self.htsfile)
+ with nogil:
+ self.retval = hts_itr_next(hts_get_bgzfp(self.htsfile),
+ self.iter,
+ self.b,
+ self.htsfile)
def __next__(self):
- """python version of next().
- """
self.cnext()
if self.retval >= 0:
- return makeAlignedSegment(self.b)
+ return makeAlignedSegment(self.b, self.samfile)
elif self.retval == -2:
# Note: it is currently not the case that hts_iter_next
# returns -2 for a truncated file.
@@ -1546,7 +1699,7 @@ cdef class IteratorRowRegion(IteratorRow):
cdef class IteratorRowHead(IteratorRow):
"""*(AlignmentFile samfile, n, int multiple_iterators=False)*
- iterate over first n reads in *samfile*
+ iterate over first n reads in `samfile`
.. note::
It is usually not necessary to create an object of this class
@@ -1572,23 +1725,22 @@ cdef class IteratorRowHead(IteratorRow):
cdef int cnext(self):
'''cversion of iterator. Used by IteratorColumn'''
- return sam_read1(self.htsfile,
- self.samfile.header,
- self.b)
+ cdef int ret
+ with nogil:
+ ret = sam_read1(self.htsfile,
+ self.samfile.header,
+ self.b)
+ return ret
def __next__(self):
- """python version of next().
-
- pyrex uses this non-standard name instead of next()
- """
if self.current_row >= self.max_rows:
raise StopIteration
cdef int ret = self.cnext()
- if (ret >= 0):
+ if ret >= 0:
self.current_row += 1
- return makeAlignedSegment( self.b )
- elif (ret == -2):
+ return makeAlignedSegment(self.b, self.samfile)
+ elif ret == -2:
raise IOError('truncated file')
else:
raise StopIteration
@@ -1597,7 +1749,7 @@ cdef class IteratorRowHead(IteratorRow):
cdef class IteratorRowAll(IteratorRow):
"""*(AlignmentFile samfile, int multiple_iterators=False)*
- iterate over all reads in *samfile*
+ iterate over all reads in `samfile`
.. note::
@@ -1621,19 +1773,18 @@ cdef class IteratorRowAll(IteratorRow):
cdef int cnext(self):
'''cversion of iterator. Used by IteratorColumn'''
- return sam_read1(self.htsfile,
- self.samfile.header,
- self.b)
+ cdef int ret
+ with nogil:
+ ret = sam_read1(self.htsfile,
+ self.samfile.header,
+ self.b)
+ return ret
def __next__(self):
- """python version of next().
-
- pyrex uses this non-standard name instead of next()
- """
cdef int ret = self.cnext()
- if (ret >= 0):
- return makeAlignedSegment(self.b)
- elif (ret == -2):
+ if ret >= 0:
+ return makeAlignedSegment(self.b, self.samfile)
+ elif ret == -2:
raise IOError('truncated file')
else:
raise StopIteration
@@ -1645,7 +1796,7 @@ cdef class IteratorRowAllRefs(IteratorRow):
.. note::
It is usually not necessary to create an object of this class
- explicitely. It is returned as a result of call to a
+ explicitly. It is returned as a result of call to a
:meth:`AlignmentFile.fetch`.
"""
@@ -1656,7 +1807,7 @@ cdef class IteratorRowAllRefs(IteratorRow):
IteratorRow.__init__(self, samfile,
multiple_iterators=multiple_iterators)
- if not samfile._hasIndex():
+ if not samfile.has_index():
raise ValueError("no index available for fetch")
self.tid = -1
@@ -1681,10 +1832,6 @@ cdef class IteratorRowAllRefs(IteratorRow):
return self
def __next__(self):
- """python version of next().
-
- pyrex uses this non-standard name instead of next()
- """
# Create an initial iterator
if self.tid == -1:
if not self.samfile.nreferences:
@@ -1697,7 +1844,7 @@ cdef class IteratorRowAllRefs(IteratorRow):
# If current iterator is not exhausted, return aligned read
if self.rowiter.retval > 0:
- return makeAlignedSegment(self.rowiter.b)
+ return makeAlignedSegment(self.rowiter.b, self.samfile)
self.tid += 1
@@ -1711,11 +1858,11 @@ cdef class IteratorRowAllRefs(IteratorRow):
cdef class IteratorRowSelection(IteratorRow):
"""*(AlignmentFile samfile)*
- iterate over reads in *samfile* at a given list of file positions.
+ iterate over reads in `samfile` at a given list of file positions.
.. note::
It is usually not necessary to create an object of this class
- explicitely. It is returned as a result of call to a :meth:`AlignmentFile.fetch`.
+ explicitly. It is returned as a result of call to a :meth:`AlignmentFile.fetch`.
"""
def __init__(self, AlignmentFile samfile, positions, int multiple_iterators=True):
@@ -1733,26 +1880,27 @@ cdef class IteratorRowSelection(IteratorRow):
cdef int cnext(self):
'''cversion of iterator'''
-
# end iteration if out of positions
if self.current_pos >= len(self.positions): return -1
- bgzf_seek(hts_get_bgzfp(self.htsfile),
- self.positions[self.current_pos],
- 0)
+ cdef uint64_t pos = self.positions[self.current_pos]
+ with nogil:
+ bgzf_seek(hts_get_bgzfp(self.htsfile),
+ pos,
+ 0)
self.current_pos += 1
- return sam_read1(self.htsfile,
- self.samfile.header,
- self.b)
- def __next__(self):
- """python version of next().
+ cdef int ret
+ with nogil:
+ ret = sam_read1(self.htsfile,
+ self.samfile.header,
+ self.b)
+ return ret
- pyrex uses this non-standard name instead of next()
- """
+ def __next__(self):
cdef int ret = self.cnext()
if (ret >= 0):
- return makeAlignedSegment(self.b)
+ return makeAlignedSegment(self.b, self.samfile)
elif (ret == -2):
raise IOError('truncated file')
else:
@@ -1764,7 +1912,10 @@ cdef int __advance_nofilter(void *data, bam1_t *b):
'''
cdef __iterdata * d
d = <__iterdata*>data
- return sam_itr_next(d.htsfile, d.iter, b)
+ cdef int ret
+ with nogil:
+ ret = sam_itr_next(d.htsfile, d.iter, b)
+ return ret
cdef int __advance_all(void *data, bam1_t *b):
@@ -1777,10 +1928,12 @@ cdef int __advance_all(void *data, bam1_t *b):
cdef __iterdata * d
cdef mask = BAM_FUNMAP | BAM_FSECONDARY | BAM_FQCFAIL | BAM_FDUP
d = <__iterdata*>data
- cdef int ret = sam_itr_next(d.htsfile, d.iter, b)
- while ret >= 0 and b.core.flag & mask:
+ cdef int ret
+ with nogil:
ret = sam_itr_next(d.htsfile, d.iter, b)
-
+ while ret >= 0 and b.core.flag & mask:
+ with nogil:
+ ret = sam_itr_next(d.htsfile, d.iter, b)
return ret
@@ -1798,13 +1951,16 @@ cdef int __advance_snpcalls(void * data, bam1_t * b):
cdef __iterdata * d
d = <__iterdata*>data
- cdef int ret = sam_itr_next(d.htsfile, d.iter, b)
+ cdef int ret
cdef int skip = 0
cdef int q
cdef int is_cns = 1
cdef int is_nobaq = 0
cdef int capQ_thres = 0
+ with nogil:
+ ret = sam_itr_next(d.htsfile, d.iter, b)
+
# reload sequence
if d.fastafile != NULL and b.core.tid != d.tid:
if d.seq != NULL:
@@ -1813,7 +1969,7 @@ cdef int __advance_snpcalls(void * data, bam1_t * b):
d.seq = faidx_fetch_seq(
d.fastafile,
d.header.target_name[d.tid],
- 0, max_pos,
+ 0, MAX_POS,
&d.seq_len)
if d.seq == NULL:
@@ -1844,7 +2000,8 @@ cdef int __advance_snpcalls(void * data, bam1_t * b):
break
# additional filters
- ret = sam_itr_next(d.htsfile, d.iter, b)
+ with nogil:
+ ret = sam_itr_next(d.htsfile, d.iter, b)
return ret
@@ -1862,7 +2019,7 @@ cdef class IteratorColumn:
result = list( f.pileup() )
Here, ``result`` will contain ``n`` objects of type
- :class:`PileupColumn` for ``n`` columns, but each object in
+ :class:`~pysam.PileupColumn` for ``n`` columns, but each object in
``result`` will contain the same information.
The desired behaviour can be achieved by list comprehension::
@@ -1870,9 +2027,9 @@ cdef class IteratorColumn:
result = [ x.pileups() for x in f.pileup() ]
``result`` will be a list of ``n`` lists of objects of type
- :class:`PileupRead`.
+ :class:`~pysam.PileupRead`.
- If the iterator is associated with a :class:`Fastafile` using the
+ If the iterator is associated with a :class:`~pysam.Fastafile` using the
:meth:`addReference` method, then the iterator will export the
current sequence via the methods :meth:`getSequence` and
:meth:`seq_len`.
@@ -1887,7 +2044,7 @@ cdef class IteratorColumn:
See AlignmentFile.pileup for description.
fastafile
- A :class:`FastaFile` object
+ A :class:`~pysam.FastaFile` object
max_depth
maximum read depth. The default is 8000.
@@ -1896,8 +2053,6 @@ cdef class IteratorColumn:
def __cinit__( self, AlignmentFile samfile, **kwargs ):
self.samfile = samfile
- # TODO
- # self.mask = kwargs.get("mask", BAM_DEF_MASK )
self.fastafile = kwargs.get("fastafile", None)
self.stepper = kwargs.get("stepper", None)
self.max_depth = kwargs.get("max_depth", 8000)
@@ -1914,12 +2069,12 @@ cdef class IteratorColumn:
cdef int cnext(self):
'''perform next iteration.
'''
- self.plp = bam_plp_auto( self.pileup_iter,
- &self.tid,
- &self.pos,
- &self.n_plp )
+ self.plp = bam_plp_auto(self.pileup_iter,
+ &self.tid,
+ &self.pos,
+ &self.n_plp )
- cdef char * getSequence( self ):
+ cdef char * getSequence(self):
'''return current reference sequence underlying the iterator.
'''
return self.iterdata.seq
@@ -1930,7 +2085,7 @@ cdef class IteratorColumn:
def addReference(self, Fastafile fastafile):
'''
- add reference sequences in *fastafile* to iterator.'''
+ add reference sequences in `fastafile` to iterator.'''
self.fastafile = fastafile
if self.iterdata.seq != NULL: free(self.iterdata.seq)
self.iterdata.tid = -1
@@ -1944,7 +2099,7 @@ cdef class IteratorColumn:
cdef setMask(self, mask):
'''set masking flag in iterator.
- reads with bits set in *mask* will be skipped.
+ reads with bits set in `mask` will be skipped.
'''
raise NotImplementedError()
# self.mask = mask
@@ -1954,7 +2109,7 @@ cdef class IteratorColumn:
int tid,
int start,
int end,
- int multiple_iterators = 0 ):
+ int multiple_iterators=0 ):
'''setup the iterator structure'''
self.iter = IteratorRowRegion(self.samfile, tid, start, end, multiple_iterators)
@@ -2040,7 +2195,7 @@ cdef class IteratorColumnRegion(IteratorColumn):
def __cinit__(self, AlignmentFile samfile,
int tid = 0,
int start = 0,
- int end = max_pos,
+ int end = MAX_POS,
int truncate = False,
**kwargs ):
@@ -2051,8 +2206,6 @@ cdef class IteratorColumnRegion(IteratorColumn):
self.truncate = truncate
def __next__(self):
- """python version of next().
- """
while 1:
self.cnext()
@@ -2069,7 +2222,8 @@ cdef class IteratorColumnRegion(IteratorColumn):
return makePileupColumn(&self.plp,
self.tid,
self.pos,
- self.n_plp)
+ self.n_plp,
+ self.samfile)
cdef class IteratorColumnAllRefs(IteratorColumn):
@@ -2085,11 +2239,9 @@ cdef class IteratorColumnAllRefs(IteratorColumn):
raise StopIteration
# initialize iterator
- self.setupIteratorData(self.tid, 0, max_pos, 1)
+ self.setupIteratorData(self.tid, 0, MAX_POS, 1)
def __next__(self):
- """python version of next().
- """
while 1:
self.cnext()
@@ -2102,1852 +2254,166 @@ cdef class IteratorColumnAllRefs(IteratorColumn):
return makePileupColumn(&self.plp,
self.tid,
self.pos,
- self.n_plp)
+ self.n_plp,
+ self.samfile)
# otherwise, proceed to next reference or stop
self.tid += 1
if self.tid < self.samfile.nreferences:
- self.setupIteratorData(self.tid, 0, max_pos, 0)
+ self.setupIteratorData(self.tid, 0, MAX_POS, 0)
else:
raise StopIteration
-cdef inline int32_t _getQueryStart(bam1_t *src) except -1:
- cdef uint32_t * cigar_p
- cdef uint32_t k, op
- cdef uint32_t start_offset = 0
-
- if pysam_get_n_cigar(src):
- cigar_p = pysam_bam_get_cigar(src);
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- if op == BAM_CHARD_CLIP:
- if start_offset != 0 and start_offset != src.core.l_qseq:
- PyErr_SetString(ValueError, 'Invalid clipping in CIGAR string')
- return -1
- elif op == BAM_CSOFT_CLIP:
- start_offset += cigar_p[k] >> BAM_CIGAR_SHIFT
- else:
- break
-
- return start_offset
-
-
-cdef inline int32_t _getQueryEnd(bam1_t *src) except -1:
- cdef uint32_t * cigar_p
- cdef uint32_t k, op
- cdef uint32_t end_offset = src.core.l_qseq
-
- if pysam_get_n_cigar(src) > 1:
- cigar_p = pysam_bam_get_cigar(src);
- for k from pysam_get_n_cigar(src) > k >= 1:
- op = cigar_p[k] & BAM_CIGAR_MASK
- if op == BAM_CHARD_CLIP:
- if end_offset != 0 and end_offset != src.core.l_qseq:
- PyErr_SetString(ValueError,
- 'Invalid clipping in CIGAR string')
- return -1
- elif op == BAM_CSOFT_CLIP:
- end_offset -= cigar_p[k] >> BAM_CIGAR_SHIFT
- else:
- break
- if end_offset == 0:
- end_offset = src.core.l_qseq
- return end_offset
+cdef class SNPCall:
+ '''the results of a SNP call.'''
+ cdef int _tid
+ cdef int _pos
+ cdef char _reference_base
+ cdef char _genotype
+ cdef int _consensus_quality
+ cdef int _snp_quality
+ cdef int _rms_mapping_quality
+ cdef int _coverage
+ property tid:
+ '''the chromosome ID as is defined in the header'''
+ def __get__(self):
+ return self._tid
-cdef inline object _getSequenceRange(bam1_t *src,
- uint32_t start, uint32_t end):
- cdef uint8_t * p
- cdef uint32_t k
- cdef char * s
+ property pos:
+ '''nucleotide position of SNP.'''
+ def __get__(self): return self._pos
- if not src.core.l_qseq:
- return None
+ property reference_base:
+ '''reference base at pos. ``N`` if no reference sequence supplied.'''
+ def __get__(self): return from_string_and_size( &self._reference_base, 1 )
- seq = PyBytes_FromStringAndSize(NULL, end - start)
- s = <char*>seq
- p = pysam_bam_get_seq(src)
+ property genotype:
+ '''the genotype called.'''
+ def __get__(self): return from_string_and_size( &self._genotype, 1 )
- for k from start <= k < end:
- # equivalent to seq_nt16_str[bam1_seqi(s, i)] (see bam.c)
- # note: do not use string literal as it will be a python string
- s[k-start] = seq_nt16_str[p[k/2] >> 4 * (1 - k%2) & 0xf]
+ property consensus_quality:
+ '''the genotype quality (Phred-scaled).'''
+ def __get__(self): return self._consensus_quality
- return _charptr_to_str(seq)
+ property snp_quality:
+ '''the snp quality (Phred scaled) - probability of consensus being
+ identical to reference sequence.'''
+ def __get__(self): return self._snp_quality
+ property mapping_quality:
+ '''the root mean square (rms) of the mapping quality of all reads
+ involved in the call.'''
+ def __get__(self): return self._rms_mapping_quality
-cdef inline object _getQualitiesRange(bam1_t *src,
- uint32_t start,
- uint32_t end):
- '''return an array of quality values.'''
+ property coverage:
+ '''coverage or read depth - the number of reads involved in the call.'''
+ def __get__(self): return self._coverage
- cdef uint8_t * p
- cdef uint32_t k
+ def __str__(self):
- p = pysam_bam_get_qual(src)
- if p[0] == 0xff:
- return None
+ return "\t".join( map(str, (
+ self.tid,
+ self.pos,
+ self.reference_base,
+ self.genotype,
+ self.consensus_quality,
+ self.snp_quality,
+ self.mapping_quality,
+ self.coverage ) ) )
- # 'B': unsigned char
- cdef array.array result = array.array('B', [0])
- array.resize(result, end - start)
- # copy data
- memcpy(result.data.as_voidptr, <void*>&p[start], end - start)
+cdef class IndexedReads:
+ """*(AlignmentFile samfile, multiple_iterators=True)
- return result
+ Index a Sam/BAM-file by query name while keeping the
+ original sort order intact.
+ The index is kept in memory and can be substantial.
-def toQualityString(qualities):
- '''convert a list of quality score to the string
- representation used in the SAM format.'''
- if qualities is None:
- return None
- return "".join([chr(x+33) for x in qualities])
-
+ By default, the file is re-openend to avoid conflicts if multiple
+ operators work on the same file. Set `multiple_iterators` = False
+ to not re-open `samfile`.
-def fromQualityString(quality_string):
- '''return a list of quality scores from the
- stringn representation of quality scores used
- in the SAM format.'''
- if quality_string is None:
- return None
- return array.array('B', [ord(x)-33 for x in quality_string])
-
-
-cdef inline uint8_t _get_value_code(value, value_type=None):
- '''guess type code for a *value*. If *value_type* is None,
- the type code will be inferred based on the Python type of
- *value*'''
- cdef uint8_t type_code
- cdef char * _char_type
-
- if value_type is None:
- if isinstance(value, int):
- type_code = 'i'
- elif isinstance(value, float):
- type_code = 'd'
- elif isinstance(value, str):
- type_code = 'Z'
- elif isinstance(value, bytes):
- type_code = 'Z'
- else:
- return 0
- else:
- if value_type not in 'Zidf':
- return 0
- value_type = _forceBytes(value_type)
- _char_type = value_type
- type_code = (<uint8_t*>_char_type)[0]
+ Parameters
+ ----------
- return type_code
+ samfile : AlignmentFile
+ File to be indexed.
+ multiple_iterators : bool
+ Flag indicating whether the file should be reopened. Reopening prevents
+ existing iterators being affected by the indexing.
-cdef inline _get_value_type(value, maximum_value=None):
- '''returns the value type of a value.
+ """
- If max is specified, the approprite type is
- returned for a range where value is the minimum.
- '''
-
- if maximum_value is None:
- maximum_value = value
-
- t = type(value)
-
- if t is float:
- valuetype = b'f'
- elif t is int:
- # signed ints
- if value < 0:
- if value >= -128 and maximum_value < 128:
- valuetype = b'c'
- elif value >= -32768 and maximum_value < 32768:
- valuetype = b's'
- elif value < -2147483648 or maximum_value >= 2147483648:
- raise ValueError(
- "at least one signed integer out of range of "
- "BAM/SAM specification")
- else:
- valuetype = b'i'
- # unsigned ints
- else:
- if maximum_value < 256:
- valuetype = b'C'
- elif maximum_value < 65536:
- valuetype = b'S'
- elif maximum_value >= 4294967296:
- raise ValueError(
- "at least one integer out of range of BAM/SAM specification")
- else:
- valuetype = b'I'
- else:
- # Note: hex strings (H) are not supported yet
- if t is not bytes:
- value = value.encode('ascii')
- if len(value) == 1:
- valuetype = b"A"
- else:
- valuetype = b'Z'
+ def __init__(self, AlignmentFile samfile, int multiple_iterators=True):
+ cdef char *cfilename
- return valuetype
+ # makes sure that samfile stays alive as long as this
+ # object is alive.
+ self.samfile = samfile
+ assert samfile.is_bam, "can only IndexReads on bam files"
-cdef inline _pack_tags(tags):
- """pack a list of tags. Each tag is a tuple of (tag, tuple).
-
- Values are packed into the most space efficient data structure
- possible unless the tag contains a third field with the type code.
+ # multiple_iterators the file - note that this makes the iterator
+ # slow and causes pileup to slow down significantly.
+ if multiple_iterators:
+ cfilename = samfile._filename
+ with nogil:
+ self.htsfile = hts_open(cfilename, 'r')
+ assert self.htsfile != NULL
+ # read header - required for accurate positioning
+ with nogil:
+ self.header = sam_hdr_read(self.htsfile)
+ self.owns_samfile = True
+ else:
+ self.htsfile = self.samfile.htsfile
+ self.header = self.samfile.header
+ self.owns_samfile = False
- Returns a fmt string and the associated list of arguments
- to used in a call to struct.pack_into.
- """
- fmts, args = ["<"], []
+ def build(self):
+ '''build the index.'''
- for tag in tags:
+ self.index = collections.defaultdict(list)
- if len(tag) == 2:
- pytag, value = tag
- valuetype = None
- elif len(tag) == 3:
- pytag, value, valuetype = tag
- else:
- raise ValueError("malformatted tag: %s" % str(tag))
-
- if not type(pytag) is bytes:
- pytag = pytag.encode('ascii')
-
- datatype2format = {'c': 'b',
- 's': 'h',
- 'i': 'i',
- 'C': 'B',
- 'S': 'H',
- 'I': 'I',
- 'f': 'f',
- 'A': 'c',}
-
- t = type(value)
- if t is tuple or t is list:
- # binary tags are treated separately
- if valuetype is None:
- # automatically determine value type - first value
- # determines type. If there is a mix of types, the
- # result is undefined.
- valuetype = _get_value_type(min(value), max(value))
-
- if valuetype not in datatype2format:
- raise ValueError("invalid value type '%s'" % valuetype)
- datafmt = "2sccI%i%s" % (len(value), datatype2format[valuetype])
-
- args.extend([pytag[:2],
- b"B",
- valuetype,
- len(value)] + list(value))
- fmts.append(datafmt)
+ # this method will start indexing from the current file
+ # position if you decide
+ cdef int ret = 1
+ cdef bam1_t * b = <bam1_t*>calloc(1, sizeof( bam1_t))
- else:
-
- if valuetype is None:
- valuetype = _get_value_type(value)
-
- if valuetype == b"Z":
- fmt = "2sc%is" % (len(value)+1)
- else:
- fmt = "2sc%s" % datatype2format[valuetype]
-
- args.extend([pytag[:2],
- valuetype,
- value])
-
- fmts.append(fmt)
-
- return "".join(fmts), args
-
-
-cdef class AlignedSegment:
- '''Class representing an aligned segment.
-
- This class stores a handle to the samtools C-structure representing
- an aligned read. Member read access is forwarded to the C-structure
- and converted into python objects. This implementation should be fast,
- as only the data needed is converted.
-
- For write access, the C-structure is updated in-place. This is
- not the most efficient way to build BAM entries, as the variable
- length data is concatenated and thus needs to be resized if
- a field is updated. Furthermore, the BAM entry might be
- in an inconsistent state.
-
- One issue to look out for is that the sequence should always
- be set *before* the quality scores. Setting the sequence will
- also erase any quality scores that were set previously.
- '''
-
- # Now only called when instances are created from Python
- def __init__(self):
- # see bam_init1
- self._delegate = <bam1_t*>calloc(1, sizeof(bam1_t))
- # allocate some memory. If size is 0, calloc does not return a
- # pointer that can be passed to free() so allocate 40 bytes
- # for a new read
- self._delegate.m_data = 40
- self._delegate.data = <uint8_t *>calloc(
- self._delegate.m_data, 1)
- self._delegate.l_data = 0
-
- def __dealloc__(self):
- bam_destroy1(self._delegate)
-
- def __str__(self):
- """return string representation of alignment.
-
- The representation is an approximate :term:`sam` format.
-
- An aligned read might not be associated with a :term:`AlignmentFile`.
- As a result :term:`tid` is shown instead of the reference name.
-
- Similarly, the tags field is returned in its parsed state.
- """
- # sam-parsing is done in sam.c/bam_format1_core which
- # requires a valid header.
- return "\t".join(map(str, (self.query_name,
- self.flag,
- self.reference_id,
- self.reference_start,
- self.mapping_quality,
- self.cigarstring,
- self.next_reference_id,
- self.next_reference_start,
- self.query_alignment_length,
- self.query_sequence,
- self.query_qualities,
- self.tags)))
-
- def compare(self, AlignedSegment other):
- '''return -1,0,1, if contents in this are binary
- <,=,> to *other*
-
- '''
-
- cdef int retval, x
- cdef bam1_t *t
- cdef bam1_t *o
-
- t = self._delegate
- o = other._delegate
-
- # uncomment for debugging purposes
- # cdef unsigned char * oo, * tt
- # tt = <unsigned char*>(&t.core)
- # oo = <unsigned char*>(&o.core)
- # for x from 0 <= x < sizeof( bam1_core_t): print x, tt[x], oo[x]
- # tt = <unsigned char*>(t.data)
- # oo = <unsigned char*>(o.data)
- # for x from 0 <= x < max(t.l_data, o.l_data): print x, tt[x], oo[x], chr(tt[x]), chr(oo[x])
-
- # Fast-path test for object identity
- if t == o:
- return 0
-
- retval = memcmp(&t.core, &o.core, sizeof(bam1_core_t))
-
- if retval:
- return retval
- # cmp(t.l_data, o.l_data)
- retval = (t.l_data > o.l_data) - (t.l_data < o.l_data)
- if retval:
- return retval
- return memcmp(t.data, o.data, t.l_data)
-
- def __richcmp__(self, AlignedSegment other, int op):
- if op == 2: # == operator
- return self.compare(other) == 0
- elif op == 3: # != operator
- return self.compare(other) != 0
- else:
- return NotImplemented
-
- # Disabled so long as __cmp__ is a special method
- def __hash__(self):
- cdef bam1_t * src
- src = self._delegate
- # shift and xor values in the core structure
- # make sure tid and mtid are shifted by different amounts
- # should variable length data be included?
- cdef uint32_t hash_value = src.core.tid << 24 ^ \
- src.core.pos << 16 ^ \
- src.core.qual << 8 ^ \
- src.core.flag ^ \
- src.core.isize << 24 ^ \
- src.core.mtid << 16 ^ \
- src.core.mpos << 8
-
- return hash_value
-
- ########################################################
- ## Basic attributes in order of appearance in SAM format
- property query_name:
- """the query template name (None if not present)"""
- def __get__(self):
- cdef bam1_t * src
- src = self._delegate
- if pysam_get_l_qname(src) == 0:
- return None
- return _charptr_to_str(<char *>pysam_bam_get_qname(src))
-
- def __set__(self, qname):
- if qname is None or len(qname) == 0:
- return
- qname = _forceBytes(qname)
- cdef bam1_t * src
- cdef int l
- cdef char * p
-
- src = self._delegate
- p = pysam_bam_get_qname(src)
-
- # the qname is \0 terminated
- l = len(qname) + 1
- pysam_bam_update(src,
- pysam_get_l_qname(src),
- l,
- <uint8_t*>p)
-
-
- pysam_set_l_qname(src, l)
-
- # re-acquire pointer to location in memory
- # as it might have moved
- p = pysam_bam_get_qname(src)
-
- strncpy(p, qname, l)
-
- property flag:
- """properties flag"""
- def __get__(self):
- return pysam_get_flag(self._delegate)
- def __set__(self, flag):
- pysam_set_flag(self._delegate, flag)
-
- property reference_id:
- """:term:`reference` ID
-
- .. note::
-
- This field contains the index of the reference sequence in
- the sequence dictionary. To obtain the name of the
- reference sequence, use
- :meth:`pysam.AlignmentFile.getrname()`
-
- """
- def __get__(self): return self._delegate.core.tid
- def __set__(self, tid): self._delegate.core.tid = tid
-
- property reference_start:
- """0-based leftmost coordinate"""
- def __get__(self): return self._delegate.core.pos
- def __set__(self, pos):
- ## setting the position requires updating the "bin" attribute
- cdef bam1_t * src
- src = self._delegate
- src.core.pos = pos
- if pysam_get_n_cigar(src):
- pysam_set_bin(src,
- hts_reg2bin(
- src.core.pos,
- bam_endpos(src),
- 14,
- 5))
- else:
- pysam_set_bin(src,
- hts_reg2bin(
- src.core.pos,
- src.core.pos + 1,
- 14,
- 5))
-
- property mapping_quality:
- """mapping quality"""
- def __get__(self):
- return pysam_get_qual(self._delegate)
- def __set__(self, qual):
- pysam_set_qual(self._delegate, qual)
-
- property cigarstring:
- '''the :term:`cigar` alignment as a string.
-
- The cigar string is a string of alternating integers
- and characters denoting the length and the type of
- an operation.
-
- .. note::
- The order length,operation is specified in the
- SAM format. It is different from the order of
- the :attr:`cigar` property.
-
- Returns None if not present.
-
- To unset the cigarstring, assign None or the
- empty string.
- '''
- def __get__(self):
- c = self.cigartuples
- if c is None:
- return None
- # reverse order
- else:
- return "".join([ "%i%c" % (y,CODE2CIGAR[x]) for x,y in c])
-
- def __set__(self, cigar):
- if cigar is None or len(cigar) == 0:
- self.cigartuples = []
- else:
- parts = CIGAR_REGEX.findall(cigar)
- # reverse order
- self.cigartuples = [(CIGAR2CODE[ord(y)], int(x)) for x,y in parts]
-
- # TODO
- # property cigar:
- # """the cigar alignment"""
-
- property next_reference_id:
- """the :term:`reference` id of the mate/next read."""
- def __get__(self): return self._delegate.core.mtid
- def __set__(self, mtid):
- self._delegate.core.mtid = mtid
-
- property next_reference_start:
- """the position of the mate/next read."""
- def __get__(self):
- return self._delegate.core.mpos
- def __set__(self, mpos):
- self._delegate.core.mpos = mpos
-
- property query_length:
- """the length of the query/read.
-
- This value corresponds to the length of the sequence supplied
- in the BAM/SAM file. The length of a query is 0 if there is no
- sequence in the BAM/SAM file. In those cases, the read length
- can be inferred from the CIGAR alignment, see
- :meth:`pysam.AlignmentFile.infer_query_length.`.
-
- The length includes soft-clipped bases and is equal to
- ``len(query_sequence)``.
-
- This property is read-only but can be set by providing a
- sequence.
-
- Returns 0 if not available.
-
- """
- def __get__(self):
- return self._delegate.core.l_qseq
-
- property template_length:
- """the observed query template length"""
- def __get__(self):
- return self._delegate.core.isize
- def __set__(self, isize):
- self._delegate.core.isize = isize
-
- property query_sequence:
- """read sequence bases, including :term:`soft clipped` bases
- (None if not present).
-
- Note that assigning to seq will invalidate any quality scores.
- Thus, to in-place edit the sequence and quality scores, copies of
- the quality scores need to be taken. Consider trimming for example::
-
- q = read.qual
- read.seq = read.seq[5:10]
- read.qual = q[5:10]
-
- The sequence is returned as it is stored in the BAM file. Some mappers
- might have stored a reverse complement of the original read
- sequence.
- """
- def __get__(self):
- cdef bam1_t * src
- cdef char * s
- src = self._delegate
-
- if src.core.l_qseq == 0: return None
-
- return _getSequenceRange(src, 0, src.core.l_qseq)
-
- def __set__(self, seq):
- # samtools manages sequence and quality length memory together
- # if no quality information is present, the first byte says 0xff.
- cdef bam1_t * src
- cdef uint8_t * p
- cdef char * s
- cdef int l, k, nbytes_new, nbytes_old
-
- if seq == None:
- l = 0
- else:
- l = len(seq)
- seq = _forceBytes(seq)
-
- src = self._delegate
-
- # as the sequence is stored in half-bytes, the total length (sequence
- # plus quality scores) is (l+1)/2 + l
- nbytes_new = (l + 1) / 2 + l
- nbytes_old = (src.core.l_qseq + 1) / 2 + src.core.l_qseq
-
- # acquire pointer to location in memory
- p = pysam_bam_get_seq(src)
- src.core.l_qseq = l
-
- # change length of data field
- pysam_bam_update(src,
- nbytes_old,
- nbytes_new,
- p)
-
- if l > 0:
- # re-acquire pointer to location in memory
- # as it might have moved
- p = pysam_bam_get_seq(src)
- for k from 0 <= k < nbytes_new:
- p[k] = 0
- # convert to C string
- s = seq
- for k from 0 <= k < l:
- p[k/2] |= seq_nt16_table[<unsigned char>s[k]] << 4 * (1 - k % 2)
-
- # erase qualities
- p = pysam_bam_get_qual(src)
- p[0] = 0xff
-
- property query_qualities:
- """read sequence base qualities, including :term:`soft
- clipped` bases (None if not present).
-
- Quality scores are returned as a python array of unsigned
- chars. Note that this is not the ASCII-encoded value typically
- seen in FASTQ or SAM formatted files. Thus, no offset of 33
- needs to be subtracted.
-
- Note that to set quality scores the sequence has to be set
- beforehand as this will determine the expected length of the
- quality score array.
-
- This method raises a ValueError if the length of the
- quality scores and the sequence are not the same.
-
- """
- def __get__(self):
-
- cdef bam1_t * src
- cdef char * q
-
- src = self._delegate
-
- if src.core.l_qseq == 0:
- return None
-
- return _getQualitiesRange(src, 0, src.core.l_qseq)
-
- def __set__(self, qual):
- # note that memory is already allocated via setting the sequence
- # hence length match of sequence and quality needs is checked.
- cdef bam1_t * src
- cdef uint8_t * p
- cdef int l
-
- src = self._delegate
- p = pysam_bam_get_qual(src)
- if qual is None or len(qual) == 0:
- # if absent and there is a sequence: set to 0xff
- if src.core.l_qseq != 0:
- p[0] = 0xff
- return
-
- # check for length match
- l = len(qual)
- if src.core.l_qseq != l:
- raise ValueError(
- "quality and sequence mismatch: %i != %i" %
- (l, src.core.l_qseq))
-
- # create a python array object filling it
- # with the quality scores
-
- # NB: should avoid this copying if qual is
- # already of the correct type.
- cdef array.array result = array.array('B', qual)
-
- # copy data
- memcpy(p, result.data.as_voidptr, l)
-
-
- property bin:
- """properties bin"""
- def __get__(self):
- return pysam_get_bin(self._delegate)
- def __set__(self, bin):
- pysam_set_bin(self._delegate, bin)
-
-
- ##########################################################
- # Derived simple attributes. These are simple attributes of
- # AlignedSegment getting and setting values.
- ##########################################################
- # 1. Flags
- ##########################################################
- property is_paired:
- """true if read is paired in sequencing"""
- def __get__(self):
- return (self.flag & BAM_FPAIRED) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FPAIRED)
-
- property is_proper_pair:
- """true if read is mapped in a proper pair"""
- def __get__(self):
- return (self.flag & BAM_FPROPER_PAIR) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FPROPER_PAIR)
- property is_unmapped:
- """true if read itself is unmapped"""
- def __get__(self):
- return (self.flag & BAM_FUNMAP) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FUNMAP)
- property mate_is_unmapped:
- """true if the mate is unmapped"""
- def __get__(self):
- return (self.flag & BAM_FMUNMAP) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FMUNMAP)
- property is_reverse:
- """true if read is mapped to reverse strand"""
- def __get__(self):
- return (self.flag & BAM_FREVERSE) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FREVERSE)
- property mate_is_reverse:
- """true is read is mapped to reverse strand"""
- def __get__(self):
- return (self.flag & BAM_FMREVERSE) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FMREVERSE)
- property is_read1:
- """true if this is read1"""
- def __get__(self):
- return (self.flag & BAM_FREAD1) != 0
- def __set__(self,val):
- pysam_update_flag(self._delegate, val, BAM_FREAD1)
- property is_read2:
- """true if this is read2"""
- def __get__(self):
- return (self.flag & BAM_FREAD2) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FREAD2)
- property is_secondary:
- """true if not primary alignment"""
- def __get__(self):
- return (self.flag & BAM_FSECONDARY) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FSECONDARY)
- property is_qcfail:
- """true if QC failure"""
- def __get__(self):
- return (self.flag & BAM_FQCFAIL) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FQCFAIL)
- property is_duplicate:
- """true if optical or PCR duplicate"""
- def __get__(self):
- return (self.flag & BAM_FDUP) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FDUP)
- property is_supplementary:
- """true if this is a supplementary alignment"""
- def __get__(self):
- return (self.flag & BAM_FSUPPLEMENTARY) != 0
- def __set__(self, val):
- pysam_update_flag(self._delegate, val, BAM_FSUPPLEMENTARY)
-
- # 2. Coordinates and lengths
- property reference_end:
- '''aligned reference position of the read on the reference genome.
-
- reference_end points to one past the last aligned residue.
- Returns None if not available (read is unmapped or no cigar
- alignment present).
-
- '''
- def __get__(self):
- cdef bam1_t * src
- src = self._delegate
- if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0:
- return None
- return bam_endpos(src)
-
- property reference_length:
- '''aligned length of the read on the reference genome.
-
- This is equal to `aend - pos`. Returns None if not available.'''
- def __get__(self):
- cdef bam1_t * src
- src = self._delegate
- if (self.flag & BAM_FUNMAP) or pysam_get_n_cigar(src) == 0:
- return None
- return bam_endpos(src) - \
- self._delegate.core.pos
-
- property query_alignment_sequence:
- """aligned portion of the read.
-
- This is a substring of :attr:`seq` that excludes flanking
- bases that were :term:`soft clipped` (None if not present). It
- is equal to ``seq[qstart:qend]``.
-
- SAM/BAM files may include extra flanking bases that are not
- part of the alignment. These bases may be the result of the
- Smith-Waterman or other algorithms, which may not require
- alignments that begin at the first residue or end at the last.
- In addition, extra sequencing adapters, multiplex identifiers,
- and low-quality bases that were not considered for alignment
- may have been retained.
-
- """
-
- def __get__(self):
- cdef bam1_t * src
- cdef uint32_t start, end
-
- src = self._delegate
-
- if src.core.l_qseq == 0:
- return None
-
- start = _getQueryStart(src)
- end = _getQueryEnd(src)
-
- return _getSequenceRange(src, start, end)
-
- property query_alignment_qualities:
- """aligned query sequence quality values (None if not present). These
- are the quality values that correspond to :attr:`query`, that
- is, they exclude qualities of :term:`soft clipped` bases. This
- is equal to ``qual[qstart:qend]``.
-
- Quality scores are returned as a python array of unsigned
- chars. Note that this is not the ASCII-encoded value typically
- seen in FASTQ or SAM formatted files. Thus, no offset of 33
- needs to be subtracted.
-
- This property is read-only.
-
- """
- def __get__(self):
- cdef bam1_t * src
- cdef uint32_t start, end
-
- src = self._delegate
-
- if src.core.l_qseq == 0:
- return None
-
- start = _getQueryStart(src)
- end = _getQueryEnd(src)
-
- return _getQualitiesRange(src, start, end)
-
- property query_alignment_start:
- """start index of the aligned query portion of the sequence (0-based,
- inclusive).
-
- This the index of the first base in :attr:`seq` that is not
- soft-clipped.
-
- """
- def __get__(self):
- return _getQueryStart(self._delegate)
-
- property query_alignment_end:
- """end index of the aligned query portion of the sequence (0-based,
- exclusive)"""
- def __get__(self):
- return _getQueryEnd(self._delegate)
-
- property query_alignment_length:
- """length of the aligned query sequence.
-
- This is equal to :attr:`qend` - :attr:`qstart`"""
- def __get__(self):
- cdef bam1_t * src
- src = self._delegate
- return _getQueryEnd(src) - _getQueryStart(src)
-
- #####################################################
- # Computed properties
-
- def get_reference_positions(self, full_length=False):
- """a list of reference positions that this read aligns to.
-
- By default, this method only returns positions in the
- reference that are within the alignment. If *full_length* is
- set, None values will be included for any soft-clipped or
- unaligned positions within the read. The returned list will
- thus be of the same length as the read.
-
- """
- cdef uint32_t k, i, pos
- cdef int op
- cdef uint32_t * cigar_p
- cdef bam1_t * src
- cdef bint _full = full_length
-
- src = self._delegate
- if pysam_get_n_cigar(src) == 0:
- return []
-
- result = []
- pos = src.core.pos
- cigar_p = pysam_bam_get_cigar(src)
-
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- l = cigar_p[k] >> BAM_CIGAR_SHIFT
-
- if op == BAM_CSOFT_CLIP or op == BAM_CINS:
- if _full:
- for i from 0 <= i < l:
- result.append(None)
- elif op == BAM_CMATCH:
- for i from pos <= i < pos + l:
- result.append(i)
- pos += l
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
- pos += l
-
- return result
-
- def infer_query_length(self, always=True):
- """inferred read length from CIGAR string.
-
- If *always* is set to True, the read length
- will be always inferred. If set to False, the length
- of the read sequence will be returned if it is
- available.
-
- Returns None if CIGAR string is not present.
- """
- cdef uint32_t k, qpos
- cdef int op
- cdef uint32_t * cigar_p
- cdef bam1_t * src
-
- src = self._delegate
-
- if not always and src.core.l_qseq:
- return src.core.l_qseq
-
- if pysam_get_n_cigar(src) == 0:
- return None
-
- qpos = 0
- cigar_p = pysam_bam_get_cigar(src)
-
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
-
- if op == BAM_CMATCH or op == BAM_CINS or \
- op == BAM_CSOFT_CLIP or \
- op == BAM_CEQUAL or op == BAM_CDIFF:
- qpos += cigar_p[k] >> BAM_CIGAR_SHIFT
-
- return qpos
-
- def get_aligned_pairs(self, matches_only = False):
- """a list of aligned read (query) and reference positions.
- For inserts, deletions, skipping either query or reference position may be None.
-
- If @matches_only is True, only matched bases are returned - no None on either side.
-
- Padding is currently not supported and leads to an exception
-
- """
- cdef uint32_t k, i, pos, qpos
- cdef int op
- cdef uint32_t * cigar_p
- cdef bam1_t * src
- cdef int _matches_only
-
- _matches_only = bool(matches_only)
-
- src = self._delegate
- if pysam_get_n_cigar(src) == 0:
- return []
-
- result = []
- pos = src.core.pos
- qpos = 0
- cigar_p = pysam_bam_get_cigar(src)
-
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- l = cigar_p[k] >> BAM_CIGAR_SHIFT
-
- if op == BAM_CMATCH or op == BAM_CEQUAL or op == BAM_CDIFF:
- for i from pos <= i < pos + l:
- result.append((qpos, i))
- qpos += 1
- pos += l
-
- elif op == BAM_CINS or op == BAM_CSOFT_CLIP:
- if not _matches_only:
- for i from pos <= i < pos + l:
- result.append((qpos, None))
- qpos += 1
- else:
- qpos += l
-
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
- if not _matches_only:
- for i from pos <= i < pos + l:
- result.append((None, i))
- pos += l
-
- elif op == BAM_CHARD_CLIP:
- pass # advances neither
-
- elif op == BAM_CPAD:
- raise NotImplementedError("Padding (BAM_CPAD, 6) is currently not supported. Please implement. Sorry about that.")
-
- return result
-
- def get_blocks(self):
- """ a list of start and end positions of
- aligned gapless blocks.
-
- The start and end positions are in genomic
- coordinates.
-
- Blocks are not normalized, i.e. two blocks
- might be directly adjacent. This happens if
- the two blocks are separated by an insertion
- in the read.
- """
-
- cdef uint32_t k, pos, l
- cdef int op
- cdef uint32_t * cigar_p
- cdef bam1_t * src
-
- src = self._delegate
- if pysam_get_n_cigar(src) == 0:
- return []
-
- result = []
- pos = src.core.pos
- cigar_p = pysam_bam_get_cigar(src)
- l = 0
-
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- l = cigar_p[k] >> BAM_CIGAR_SHIFT
- if op == BAM_CMATCH:
- result.append((pos, pos + l))
- pos += l
- elif op == BAM_CDEL or op == BAM_CREF_SKIP:
- pos += l
-
- return result
-
- def get_overlap(self, uint32_t start, uint32_t end):
- """return number of aligned bases of read overlapping the interval
- *start* and *end* on the reference sequence.
-
- Return None if cigar alignment is not available.
- """
- cdef uint32_t k, i, pos, overlap
- cdef int op, o
- cdef uint32_t * cigar_p
- cdef bam1_t * src
-
- overlap = 0
-
- src = self._delegate
- if pysam_get_n_cigar(src) == 0:
- return None
- pos = src.core.pos
- o = 0
-
- cigar_p = pysam_bam_get_cigar(src)
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- l = cigar_p[k] >> BAM_CIGAR_SHIFT
-
- if op == BAM_CMATCH:
- o = min( pos + l, end) - max( pos, start )
- if o > 0: overlap += o
-
- if op == BAM_CMATCH or op == BAM_CDEL or op == BAM_CREF_SKIP:
- pos += l
-
- return overlap
-
- #####################################################
- ## Unsorted as yet
- # TODO: capture in CIGAR object
- property cigartuples:
- """the :term:`cigar` alignment. The alignment
- is returned as a list of tuples of (operation, length).
-
- If the alignment is not present, None is returned.
-
- The operations are:
-
- +-----+--------------+-----+
- |M |BAM_CMATCH |0 |
- +-----+--------------+-----+
- |I |BAM_CINS |1 |
- +-----+--------------+-----+
- |D |BAM_CDEL |2 |
- +-----+--------------+-----+
- |N |BAM_CREF_SKIP |3 |
- +-----+--------------+-----+
- |S |BAM_CSOFT_CLIP|4 |
- +-----+--------------+-----+
- |H |BAM_CHARD_CLIP|5 |
- +-----+--------------+-----+
- |P |BAM_CPAD |6 |
- +-----+--------------+-----+
- |= |BAM_CEQUAL |7 |
- +-----+--------------+-----+
- |X |BAM_CDIFF |8 |
- +-----+--------------+-----+
-
- .. note::
- The output is a list of (operation, length) tuples, such as
- ``[(0, 30)]``.
- This is different from the SAM specification and
- the :attr:`cigarstring` property, which uses a
- (length, operation) order, for example: ``30M``.
-
- To unset the cigar property, assign an empty list
- or None.
- """
- def __get__(self):
- cdef uint32_t * cigar_p
- cdef bam1_t * src
- cdef uint32_t op, l
- cdef int k
-
- src = self._delegate
- if pysam_get_n_cigar(src) == 0:
- return None
-
- cigar = []
-
- cigar_p = pysam_bam_get_cigar(src);
- for k from 0 <= k < pysam_get_n_cigar(src):
- op = cigar_p[k] & BAM_CIGAR_MASK
- l = cigar_p[k] >> BAM_CIGAR_SHIFT
- cigar.append((op, l))
- return cigar
-
- def __set__(self, values):
- cdef uint32_t * p
- cdef bam1_t * src
- cdef op, l
- cdef int k, ncigar
-
- k = 0
-
- src = self._delegate
-
- # get location of cigar string
- p = pysam_bam_get_cigar(src)
-
- # empty values for cigar string
- if values is None:
- values = []
-
- ncigar = len(values)
- # create space for cigar data within src.data
- pysam_bam_update(src,
- pysam_get_n_cigar(src) * 4,
- ncigar * 4,
- <uint8_t*>p)
-
- # length is number of cigar operations, not bytes
- pysam_set_n_cigar(src, ncigar)
-
- # re-acquire pointer to location in memory
- # as it might have moved
- p = pysam_bam_get_cigar(src)
-
- # insert cigar operations
- for op, l in values:
- p[k] = l << BAM_CIGAR_SHIFT | op
- k += 1
-
- ## setting the cigar string requires updating the bin
- pysam_set_bin(src,
- hts_reg2bin(
- src.core.pos,
- bam_endpos(src),
- 14,
- 5))
-
-
- cpdef set_tag(self,
- tag,
- value,
- value_type=None,
- replace=True):
- """sets a particular field *tag* to *value* in the optional alignment
- section.
-
- *value_type* describes the type of *value* that is to entered
- into the alignment record.. It can be set explicitely to one
- of the valid one-letter type codes. If unset, an appropriate
- type will be chosen automatically.
-
- An existing value of the same *tag* will be overwritten unless
- replace is set to False. This is usually not recommened as a
- tag may only appear once in the optional alignment section.
-
- If *value* is None, the tag will be deleted.
- """
-
- cdef int value_size
- cdef uint8_t * value_ptr
- cdef uint8_t *existing_ptr
- cdef uint8_t type_code
- cdef float float_value
- cdef double double_value
- cdef int32_t int_value
- cdef bam1_t * src = self._delegate
- cdef char * _value_type
-
- if len(tag) != 2:
- raise ValueError('Invalid tag: %s' % tag)
-
- tag = _forceBytes(tag)
- if replace:
- existing_ptr = bam_aux_get(src, tag)
- if existing_ptr:
- bam_aux_del(src, existing_ptr)
-
- # setting value to None deletes a tag
- if value is None:
- return
-
- type_code = _get_value_code(value, value_type)
- if type_code == 0:
- raise ValueError("can't guess type or invalid type code specified")
-
- # Not Endian-safe, but then again neither is samtools!
- if type_code == 'Z':
- value = _forceBytes(value)
- value_ptr = <uint8_t*><char*>value
- value_size = len(value)+1
- elif type_code == 'i':
- int_value = value
- value_ptr = <uint8_t*>&int_value
- value_size = sizeof(int32_t)
- elif type_code == 'd':
- double_value = value
- value_ptr = <uint8_t*>&double_value
- value_size = sizeof(double)
- elif type_code == 'f':
- float_value = value
- value_ptr = <uint8_t*>&float_value
- value_size = sizeof(float)
- else:
- raise ValueError('Unsupported value_type in set_option')
-
-
- bam_aux_append(src,
- tag,
- type_code,
- value_size,
- value_ptr)
-
- cpdef has_tag(self, tag):
- """returns true if the optional alignment section
- contains a given *tag*."""
- cdef uint8_t * v
- cdef int nvalues
- btag = _forceBytes(tag)
- v = bam_aux_get(self._delegate, btag)
- return v != NULL
-
- cpdef get_tag(self, tag):
- """retrieves data from the optional alignment section
- given a two-letter *tag* denoting the field.
-
- If *tag* is not present, a KeyError is raised.
-
- The returned value is cast into an appropriate python type.
-
- This method is the fastest way to access the optional
- alignment section if only few tags need to be retrieved.
- """
- cdef uint8_t * v
- cdef int nvalues
- btag = _forceBytes(tag)
- v = bam_aux_get(self._delegate, btag)
- if v == NULL:
- raise KeyError("tag '%s' not present" % tag)
- auxtype = chr(v[0])
- if auxtype == 'c' or auxtype == 'C' or auxtype == 's' or auxtype == 'S':
- return <int>bam_aux2i(v)
- elif auxtype == 'i' or auxtype == 'I':
- return <int32_t>bam_aux2i(v)
- elif auxtype == 'f' or auxtype == 'F':
- return <float>bam_aux2f(v)
- elif auxtype == 'd' or auxtype == 'D':
- return <double>bam_aux2f(v)
- elif auxtype == 'A':
- # there might a more efficient way
- # to convert a char into a string
- return '%c' % <char>bam_aux2A(v)
- elif auxtype == 'Z':
- return _charptr_to_str(<char*>bam_aux2Z(v))
- elif auxtype == 'B':
- bytesize, nvalues, values = convertBinaryTagToList(v + 1)
- return values
- else:
- raise ValueError("unknown auxilliary type '%s'" % auxtype)
-
- def get_tags(self, with_value_type=False):
- """the fields in the optional aligment section.
-
- Returns a list of all fields in the optional
- alignment section. Values are converted to appropriate python
- values. For example:
-
- [(NM, 2), (RG, "GJP00TM04")]
-
- If *with_value_type* is set, the value type as encode in
- the AlignedSegment record will be returned as well:
-
- [(NM, 2, "i"), (RG, "GJP00TM04", "Z")]
-
- This method will convert all values in the optional alignment
- section. When getting only one or few tags, please see
- :meth:`get_tag` for a quicker way to achieve this.
-
- """
-
- cdef char * ctag
- cdef bam1_t * src
- cdef uint8_t * s
- cdef char auxtag[3]
- cdef char auxtype
- cdef uint8_t byte_size
- cdef int32_t nvalues
-
- src = self._delegate
- if src.l_data == 0:
- return []
- s = pysam_bam_get_aux(src)
- result = []
- auxtag[2] = 0
- while s < (src.data + src.l_data):
- # get tag
- auxtag[0] = s[0]
- auxtag[1] = s[1]
- s += 2
- auxtype = s[0]
- if auxtype in ('c', 'C'):
- value = <int>bam_aux2i(s)
- s += 1
- elif auxtype in ('s', 'S'):
- value = <int>bam_aux2i(s)
- s += 2
- elif auxtype in ('i', 'I'):
- value = <int32_t>bam_aux2i(s)
- s += 4
- elif auxtype == 'f':
- value = <float>bam_aux2f(s)
- s += 4
- elif auxtype == 'd':
- value = <double>bam_aux2f(s)
- s += 8
- elif auxtype == 'A':
- value = "%c" % <char>bam_aux2A(s)
- s += 1
- elif auxtype in ('Z', 'H'):
- value = _charptr_to_str(<char*>bam_aux2Z(s))
- # +1 for NULL terminated string
- s += len(value) + 1
- elif auxtype == 'B':
- s += 1
- byte_size, nvalues, value = convertBinaryTagToList(s)
- # 5 for 1 char and 1 int
- s += 5 + (nvalues * byte_size) - 1
- else:
- raise KeyError("unknown type '%s'" % auxtype)
-
- s += 1
-
- result.append((_charptr_to_str(auxtag), value))
-
- return result
-
- def set_tags(self, tags):
- """sets the fields in the optional alignmest section with
- a list of (tag, value) tuples.
-
- The :term:`value type` of the values is determined from the
- python type. Optionally, a type may be given explicitely as
- a third value in the tuple, For example:
-
- x.set_tags([(NM, 2, "i"), (RG, "GJP00TM04", "Z")]
-
- This method will not enforce the rule that the same tag may appear
- only once in the optional alignment section.
- """
-
- cdef bam1_t * src
- cdef uint8_t * s
- cdef char * temp
- cdef int new_size = 0
- cdef int old_size
- src = self._delegate
-
- # convert and pack the data
- if tags is not None and len(tags) > 0:
- fmt, args =_pack_tags(tags)
- new_size = struct.calcsize(fmt)
- buffer = ctypes.create_string_buffer(new_size)
- struct.pack_into(fmt,
- buffer,
- 0,
- *args)
-
- # delete the old data and allocate new space.
- # If total_size == 0, the aux field will be
- # empty
- old_size = pysam_bam_get_l_aux(src)
- pysam_bam_update(src,
- old_size,
- new_size,
- pysam_bam_get_aux(src))
-
- # copy data only if there is any
- if new_size > 0:
-
- # get location of new data
- s = pysam_bam_get_aux(src)
-
- # check if there is direct path from buffer.raw to tmp
- p = buffer.raw
- # create handle to make sure buffer stays alive long
- # enough for memcpy, see issue 129
- temp = p
- memcpy(s, temp, new_size)
-
-
- ########################################################
- # Compatibility Accessors
- # Functions, properties for compatibility with pysam < 0.8
- #
- # Several options
- # change the factory functions according to API
- # * requires code changes throughout, incl passing
- # handles to factory functions
- # subclass functions and add attributes at runtime
- # e.g.: AlignedSegments.qname = AlignedSegments.query_name
- # * will slow down the default interface
- # explicit declaration of getters/setters
- ########################################################
- property qname:
- def __get__(self): return self.query_name
- def __set__(self, v): self.query_name = v
- property tid:
- def __get__(self): return self.reference_id
- def __set__(self, v): self.reference_id = v
- property pos:
- def __get__(self): return self.reference_start
- def __set__(self, v): self.reference_start = v
- property mapq:
- def __get__(self): return self.mapping_quality
- def __set__(self, v): self.mapping_quality = v
- property rnext:
- def __get__(self): return self.next_reference_id
- def __set__(self, v): self.next_reference_id = v
- property pnext:
- def __get__(self):
- return self.next_reference_start
- def __set__(self, v):
- self.next_reference_start = v
- property cigar:
- def __get__(self):
- r = self.cigartuples
- if r is None:
- r = []
- return r
- def __set__(self, v): self.cigartuples = v
- property tlen:
- def __get__(self):
- return self.template_length
- def __set__(self, v):
- self.template_length = v
- property seq:
- def __get__(self): return self.query_sequence
- def __set__(self, v): self.query_sequence = v
- property qual:
- def __get__(self):
- return toQualityString(self.query_qualities)
- def __set__(self, v):
- self.query_qualities = fromQualityString(v)
- property alen:
- def __get__(self):
- return self.reference_length
- def __set__(self, v):
- self.reference_length = v
- property aend:
- def __get__(self):
- return self.reference_end
- def __set__(self, v):
- self.reference_end = v
- property rlen:
- def __get__(self):
- return self.query_length
- def __set__(self, v):
- self.query_length = v
- property query:
- def __get__(self):
- return self.query_alignment_sequence
- def __set__(self, v):
- self.query_alignment_sequence = v
- property qqual:
- def __get__(self):
- return toQualityString(self.query_alignment_qualities)
- def __set__(self, v):
- self.query_alignment_qualities = fromQualityString(v)
- property qstart:
- def __get__(self):
- return self.query_alignment_start
- def __set__(self, v):
- self.query_alignment_start = v
- property qend:
- def __get__(self):
- return self.query_alignment_end
- def __set__(self, v):
- self.query_alignment_end = v
- property qlen:
- def __get__(self):
- return self.query_alignment_length
- def __set__(self, v):
- self.query_alignment_length = v
- property mrnm:
- def __get__(self):
- return self.next_reference_id
- def __set__(self, v):
- self.next_reference_id = v
- property mpos:
- def __get__(self):
- return self.next_reference_start
- def __set__(self, v):
- self.next_reference_start = v
- property rname:
- def __get__(self):
- return self.reference_id
- def __set__(self, v):
- self.reference_id = v
- property isize:
- def __get__(self):
- return self.template_length
- def __set__(self, v):
- self.template_length = v
- property blocks:
- def __get__(self):
- return self.get_blocks()
- property aligned_pairs:
- def __get__(self):
- return self.get_aligned_pairs()
- property inferred_length:
- def __get__(self):
- return self.infer_query_length()
- property positions:
- def __get__(self):
- return self.get_reference_positions()
- property tags:
- def __get__(self):
- return self.get_tags()
- def __set__(self, tags):
- self.set_tags(tags)
- def overlap(self):
- return self.get_overlap()
- def opt(self, tag):
- return self.get_tag(tag)
- def setTag(self, tag, value, value_type=None, replace=True):
- return self.set_tag(tag, value, value_type, replace)
-
-
-cdef class PileupColumn:
- '''A pileup of reads at a particular reference sequence postion
- (:term:`column`). A pileup column contains all the reads that map
- to a certain target base.
-
- This class is a proxy for results returned by the samtools pileup
- engine. If the underlying engine iterator advances, the results
- of this column will change.
-
- '''
- def __init__(self):
- raise TypeError("this class cannot be instantiated from Python")
-
- def __str__(self):
- return "\t".join(map(str,
- (self.reference_id,
- self.reference_pos,
- self.nsegments))) +\
- "\n" +\
- "\n".join(map(str, self.pileups))
-
- property reference_id:
- '''the reference sequence number as defined in the header'''
- def __get__(self):
- return self.tid
-
- property nsegments:
- '''number of reads mapping to this column.'''
- def __get__(self):
- return self.n_pu
- def __set__(self, n):
- self.n_pu = n
-
- property reference_pos:
- '''the position in the reference sequence (0-based).'''
- def __get__(self):
- return self.pos
-
- property pileups:
- '''list of reads (:class:`pysam.PileupRead`) aligned to this column'''
- def __get__(self):
- cdef int x
- pileups = []
-
- if self.plp == NULL or self.plp[0] == NULL:
- raise ValueError("PileupColumn accessed after iterator finished")
-
- # warning: there could be problems if self.n and self.buf are
- # out of sync.
- for x from 0 <= x < self.n_pu:
- pileups.append(makePileupRead(&(self.plp[0][x])))
- return pileups
-
- ########################################################
- # Compatibility Accessors
- # Functions, properties for compatibility with pysam < 0.8
- ########################################################
- property pos:
- def __get__(self):
- return self.reference_pos
- def __set__(self, v):
- self.reference_pos = v
-
- property tid:
- def __get__(self):
- return self.reference_id
- def __set__(self, v):
- self.reference_id = v
-
- property n:
- def __get__(self):
- return self.nsegments
- def __set__(self, v):
- self.nsegments = v
-
-
-cdef class PileupRead:
- '''Representation of a read aligned to a particular position in the
- reference sequence.
-
- '''
-
- def __init__(self):
- raise TypeError(
- "this class cannot be instantiated from Python")
-
- def __str__(self):
- return "\t".join(
- map(str,
- (self.alignment, self.query_position,
- self.indel, self.level,
- self.is_del, self.is_head,
- self.is_tail, self.is_refskip)))
-
- property alignment:
- """a :class:`pysam.AlignedSegment` object of the aligned read"""
- def __get__(self):
- return self._alignment
-
- property query_position:
- """position of the read base at the pileup site, 0-based.
- None if is_del or is_refskip is set.
-
- """
- def __get__(self):
- if self.is_del or self.is_refskip:
- return None
- else:
- return self._qpos
-
- property indel:
- """indel length; 0 for no indel, positive for ins and negative for del"""
- def __get__(self):
- return self._indel
-
- property level:
- """the level of the read in the "viewer" mode"""
- def __get__(self):
- return self._level
-
- property is_del:
- """1 iff the base on the padded read is a deletion"""
- def __get__(self):
- return self._is_del
-
- property is_head:
- def __get__(self):
- return self._is_head
-
- property is_tail:
- def __get__(self):
- return self._is_tail
-
- property is_refskip:
- def __get__(self):
- return self._is_refskip
-
-
-cdef class SNPCall:
- '''the results of a SNP call.'''
- cdef int _tid
- cdef int _pos
- cdef char _reference_base
- cdef char _genotype
- cdef int _consensus_quality
- cdef int _snp_quality
- cdef int _rms_mapping_quality
- cdef int _coverage
-
- property tid:
- '''the chromosome ID as is defined in the header'''
- def __get__(self):
- return self._tid
-
- property pos:
- '''nucleotide position of SNP.'''
- def __get__(self): return self._pos
-
- property reference_base:
- '''reference base at pos. ``N`` if no reference sequence supplied.'''
- def __get__(self): return from_string_and_size( &self._reference_base, 1 )
-
- property genotype:
- '''the genotype called.'''
- def __get__(self): return from_string_and_size( &self._genotype, 1 )
-
- property consensus_quality:
- '''the genotype quality (Phred-scaled).'''
- def __get__(self): return self._consensus_quality
-
- property snp_quality:
- '''the snp quality (Phred scaled) - probability of consensus being
- identical to reference sequence.'''
- def __get__(self): return self._snp_quality
-
- property mapping_quality:
- '''the root mean square (rms) of the mapping quality of all reads
- involved in the call.'''
- def __get__(self): return self._rms_mapping_quality
-
- property coverage:
- '''coverage or read depth - the number of reads involved in the call.'''
- def __get__(self): return self._coverage
-
- def __str__(self):
-
- return "\t".join( map(str, (
- self.tid,
- self.pos,
- self.reference_base,
- self.genotype,
- self.consensus_quality,
- self.snp_quality,
- self.mapping_quality,
- self.coverage ) ) )
-
-
-cdef class IndexedReads:
- """index a Sam/BAM-file by query name.
-
- The index is kept in memory and can be substantial.
-
- By default, the file is re-openend to avoid conflicts if multiple
- operators work on the same file. Set *multiple_iterators* = False
- to not re-open *samfile*.
- """
-
- def __init__(self, AlignmentFile samfile, int multiple_iterators=True):
-
- # makes sure that samfile stays alive as long as this
- # object is alive.
- self.samfile = samfile
-
- assert samfile.is_bam, "can only IndexReads on bam files"
-
- # multiple_iterators the file - note that this makes the iterator
- # slow and causes pileup to slow down significantly.
- if multiple_iterators:
- self.htsfile = hts_open(samfile._filename, 'r')
- assert self.htsfile != NULL
- # read header - required for accurate positioning
- self.header = sam_hdr_read(self.htsfile)
- self.owns_samfile = True
- else:
- self.htsfile = self.samfile.htsfile
- self.header = self.samfile.header
- self.owns_samfile = False
-
- def build(self):
- '''build index.'''
-
- self.index = collections.defaultdict(list)
-
- # this method will start indexing from the current file
- # position if you decide
- cdef int ret = 1
- cdef bam1_t * b = <bam1_t*>calloc(1, sizeof( bam1_t))
-
- cdef uint64_t pos
+ cdef uint64_t pos
while ret > 0:
- pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
- ret = sam_read1(self.htsfile,
- self.samfile.header,
- b)
+ with nogil:
+ pos = bgzf_tell(hts_get_bgzfp(self.htsfile))
+ ret = sam_read1(self.htsfile,
+ self.samfile.header,
+ b)
if ret > 0:
- qname = _charptr_to_str(pysam_bam_get_qname(b))
+ qname = charptr_to_str(pysam_bam_get_qname(b))
self.index[qname].append(pos)
bam_destroy1(b)
def find(self, query_name):
- '''find *query_name* in index.
+ '''find `query_name` in index.
- Returns an iterator over all reads with query_name.
+ Returns
+ -------
+
+ IteratorRowSelection
+ Returns an iterator over all reads with query_name.
+
+ Raises
+ ------
+
+ KeyError
+ if the `query_name` is not in the index.
- Raise a KeyError if the *query_name* is not in the index.
'''
if query_name in self.index:
return IteratorRowSelection(
@@ -3962,31 +2428,8 @@ cdef class IndexedReads:
hts_close(self.htsfile)
bam_hdr_destroy(self.header)
-cpdef set_verbosity(int verbosity):
- u"""Set htslib's hts_verbose global variable to the specified value.
- """
- return hts_set_verbosity(verbosity)
-
-cpdef get_verbosity():
- u"""Return the value of htslib's hts_verbose global variable.
- """
- return hts_get_verbosity()
-
-__all__ = ["AlignmentFile",
- "IteratorRow",
- "IteratorColumn",
- "AlignedSegment",
- "PileupColumn",
- "PileupRead",
- "IndexedReads",
- "toQualityString",
- "fromQualityString",
- "get_verbosity",
- "set_verbosity"]
- # "IteratorSNPCalls",
- # "SNPCaller",
- # "IndelCaller",
- # "IteratorIndelCalls",
-
-
-
+__all__ = [
+ "AlignmentFile",
+ "IteratorRow",
+ "IteratorColumn",
+ "IndexedReads"]
diff --git a/pysam/cbcf.pxd b/pysam/cbcf.pxd
index 83e628a..b56f7ed 100644
--- a/pysam/cbcf.pxd
+++ b/pysam/cbcf.pxd
@@ -41,7 +41,7 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
from libc.stdlib cimport malloc, calloc, realloc, free
from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup
-from chtslib cimport *
+from pysam.chtslib cimport *
cdef class VariantHeader(object):
diff --git a/pysam/cbcf.pyx b/pysam/cbcf.pyx
index ae274d5..4882503 100644
--- a/pysam/cbcf.pyx
+++ b/pysam/cbcf.pyx
@@ -76,7 +76,7 @@
# fetch(contig=None, start=None, stop=None, region=None, reopen=False)
# subset_samples(include_samples)
#
-# VariantHeader(mode) # mode='r' for reading, mode='w' for writing
+# VariantHeader()
#
# version: VCF version
# samples: sequence-like access to samples
@@ -188,7 +188,7 @@ from __future__ import division, print_function
import os
import sys
-from libc.string cimport strcmp
+from libc.string cimport strcmp, strpbrk
cimport cython
@@ -217,62 +217,8 @@ cdef tuple COMPRESSION = ('NONE', 'GZIP', 'BGZF', 'CUSTOM')
## Python 3 compatibility functions
########################################################################
-IS_PYTHON3 = PY_MAJOR_VERSION >= 3
-
-
-# filename encoding (copied from lxml.etree.pyx)
-cdef str FILENAME_ENCODING
-FILENAME_ENCODING = sys.getfilesystemencoding()
-if FILENAME_ENCODING is None:
- FILENAME_ENCODING = sys.getdefaultencoding()
-if FILENAME_ENCODING is None:
- FILENAME_ENCODING = 'ascii'
-
-
-cdef bytes encode_filename(object filename):
- """Make sure a filename is 8-bit encoded (or None)."""
- if filename is None:
- return None
- elif PyBytes_Check(filename):
- return filename
- elif PyUnicode_Check(filename):
- return filename.encode(FILENAME_ENCODING)
- else:
- raise TypeError('Argument must be string or unicode.')
-
-
-cdef force_str(object s):
- """Return s converted to str type of current Python (bytes in Py2, unicode in Py3)"""
- if s is None:
- return None
- if PY_MAJOR_VERSION < 3:
- return s
- elif PyBytes_Check(s):
- return s.decode('ascii')
- else:
- # assume unicode
- return s
-
-
-cdef bytes force_bytes(object s):
- """convert string or unicode object to bytes, assuming ascii encoding."""
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode('ascii')
- else:
- raise TypeError('Argument must be string, bytes or unicode.')
-
-
-cdef charptr_to_str(const char* s):
- if PY_MAJOR_VERSION < 3:
- return s
- else:
- return s.decode('ascii')
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport encode_filename, from_string_and_size
########################################################################
@@ -285,7 +231,7 @@ cdef tuple char_array_to_tuple(const char **a, int n, int free_after=0):
if not a:
return None
try:
- return tuple( charptr_to_str(a[i]) for i in range(n) )
+ return tuple(charptr_to_str(a[i]) for i in range(n))
finally:
if free_after and a:
free(a)
@@ -371,8 +317,8 @@ cdef object bcf_info_value(const bcf_info_t *z):
return value
-cdef inline int is_gt_fmt(bcf_hdr_t *h, bcf_fmt_t *fmt):
- return strcmp(bcf_hdr_int2id(h, BCF_DT_ID, fmt.id), "GT") == 0
+cdef inline int is_gt_fmt(bcf_hdr_t *hdr, bcf_fmt_t *fmt):
+ return strcmp(bcf_hdr_int2id(hdr, BCF_DT_ID, fmt.id), "GT") == 0
########################################################################
@@ -412,25 +358,103 @@ cdef class VariantHeaderRecord(object):
return tuple( (r.keys[i] if r.keys[i] else None,
r.vals[i] if r.vals[i] else None) for i in range(r.nkeys) )
+ def __len__(self):
+ cdef bcf_hrec_t *r = self.ptr
+ return r.nkeys
+
+ def __bool__(self):
+ cdef bcf_hrec_t *r = self.ptr
+ cdef int i
+ for i in range(r.nkeys):
+ yield r.keys[i]
+
+ def __getitem__(self, key):
+ """get attribute value"""
+ cdef bcf_hrec_t *r = self.ptr
+ cdef int i
+ for i in range(r.nkeys):
+ if r.keys[i] and r.keys[i] == key:
+ return r.vals[i] if r.vals[i] else None
+ raise KeyError('cannot find metadata key')
+
+ def __iter__(self):
+ cdef bcf_hrec_t *r = self.ptr
+ cdef int i
+ for i in range(r.nkeys):
+ if r.keys[i]:
+ yield r.keys[i]
+
+ def get(self, key, default=None):
+ """D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
+ try:
+ return self[key]
+ except KeyError:
+ return default
+
+ def __contains__(self, key):
+ try:
+ self[key]
+ except KeyError:
+ return False
+ else:
+ return True
+
+ def iterkeys(self):
+ """D.iterkeys() -> an iterator over the keys of D"""
+ return iter(self)
+
+ def itervalues(self):
+ """D.itervalues() -> an iterator over the values of D"""
+ cdef bcf_hrec_t *r = self.ptr
+ cdef int i
+ for i in range(r.nkeys):
+ if r.keys[i]:
+ yield r.vals[i] if r.vals[i] else None
+
+ def iteritems(self):
+ """D.iteritems() -> an iterator over the (key, value) items of D"""
+ cdef bcf_hrec_t *r = self.ptr
+ cdef int i
+ for i in range(r.nkeys):
+ if r.keys[i]:
+ yield r.keys[i], r.vals[i] if r.vals[i] else None
+
+ def keys(self):
+ """D.keys() -> list of D's keys"""
+ return list(self)
+
+ def items(self):
+ """D.items() -> list of D's (key, value) pairs, as 2-tuples"""
+ return list(self.iteritems())
+
+ def values(self):
+ """D.values() -> list of D's values"""
+ return list(self.itervalues())
+
+ # Mappings are not hashable by default, but subclasses can change this
+ __hash__ = None
+
+ #TODO: implement __richcmp__
+
def __str__(self):
cdef bcf_hrec_t *r = self.ptr
if r.type == BCF_HL_GEN:
return '##{}={}'.format(self.key, self.value)
else:
attrs = ','.join('{}={}'.format(k, v) for k,v in self.attrs if k != 'IDX')
- return '##{}=<{}>'.format(self.type, attrs)
+ return '##{}=<{}>'.format(self.key or self.type, attrs)
-cdef VariantHeaderRecord makeVariantHeaderRecord(VariantHeader header, bcf_hrec_t *h):
+cdef VariantHeaderRecord makeVariantHeaderRecord(VariantHeader header, bcf_hrec_t *hdr):
if not header:
raise ValueError('invalid VariantHeader')
- if not h:
+ if not hdr:
return None
cdef VariantHeaderRecord record = VariantHeaderRecord.__new__(VariantHeaderRecord)
record.header = header
- record.ptr = h
+ record.ptr = hdr
return record
@@ -472,8 +496,8 @@ cdef class VariantMetadata(object):
property name:
"""metadata name"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- return h.id[BCF_DT_ID][self.id].key
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ return hdr.id[BCF_DT_ID][self.id].key
# Q: Should this be exposed?
property id:
@@ -484,12 +508,12 @@ cdef class VariantMetadata(object):
property number:
"""metadata number (i.e. cardinality)"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- if not bcf_hdr_idinfo_exists(h, self.type, self.id) or self.type == BCF_HL_FLT:
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ if not bcf_hdr_idinfo_exists(hdr, self.type, self.id) or self.type == BCF_HL_FLT:
return None
- cdef int l = bcf_hdr_id2length(h, self.type, self.id)
+ cdef int l = bcf_hdr_id2length(hdr, self.type, self.id)
if l == BCF_VL_FIXED:
- return bcf_hdr_id2number(h, self.type, self.id)
+ return bcf_hdr_id2number(hdr, self.type, self.id)
elif l == BCF_VL_VAR:
return '.'
else:
@@ -498,18 +522,26 @@ cdef class VariantMetadata(object):
property type:
"""metadata value type"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- if not bcf_hdr_idinfo_exists(h, self.type, self.id) or self.type == BCF_HL_FLT:
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ if not bcf_hdr_idinfo_exists(hdr, self.type, self.id) or self.type == BCF_HL_FLT:
return None
- return VALUE_TYPES[bcf_hdr_id2type(h, self.type, self.id)]
+ return VALUE_TYPES[bcf_hdr_id2type(hdr, self.type, self.id)]
- property header:
+ property description:
+ """metadata description (or None if not set)"""
+ def __get__(self):
+ descr = self.record.get('Description')
+ if descr:
+ descr = descr.strip('"')
+ return descr
+
+ property record:
""":class:`VariantHeaderRecord` associated with this :class:`VariantMetadata` object"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- if not bcf_hdr_idinfo_exists(h, self.type, self.id):
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ if not bcf_hdr_idinfo_exists(hdr, self.type, self.id):
return None
- cdef bcf_hrec_t *hrec = h.id[BCF_DT_ID][self.id].val.hrec[self.type]
+ cdef bcf_hrec_t *hrec = hdr.id[BCF_DT_ID][self.id].val.hrec[self.type]
if not hrec:
return None
return makeVariantHeaderRecord(self.header, hrec)
@@ -536,33 +568,56 @@ cdef VariantMetadata makeVariantMetadata(VariantHeader header, int type, int id)
cdef class VariantHeaderMetadata(object):
"""mapping from filter, info or format name to :class:`VariantMetadata` object"""
+ def add(self, id, number, type, description, **kwargs):
+ """Add a new filter, info or format record"""
+ if id in self:
+ raise ValueError('Header already exists for id={}'.format(id))
+
+ if self.type == BCF_HL_FLT:
+ if number is not None:
+ raise ValueError('Number must be None when adding a filter')
+ if type is not None:
+ raise ValueError('Type must be None when adding a filter')
+
+ items = [('ID', id), ('Description', description)]
+ else:
+ if type not in VALUE_TYPES:
+ raise ValueError('unknown type specified: {}'.format(type))
+ if number is None:
+ number = '.'
+
+ items = [('ID', id), ('Number', number), ('Type', type), ('Description', description)]
+
+ items += kwargs.items()
+ self.header.add_meta(METADATA_TYPES[self.type], items=items)
+
def __len__(self):
- cdef bcf_hdr_t *h = self.header.ptr
+ cdef bcf_hdr_t *hdr = self.header.ptr
cdef bcf_idpair_t *idpair
cdef int32_t i, n = 0
- for i in range(h.n[BCF_DT_ID]):
- idpair = h.id[BCF_DT_ID] + i
+ for i in range(hdr.n[BCF_DT_ID]):
+ idpair = hdr.id[BCF_DT_ID] + i
if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF:
n += 1
return n
def __bool__(self):
- cdef bcf_hdr_t *h = self.header.ptr
+ cdef bcf_hdr_t *hdr = self.header.ptr
cdef bcf_idpair_t *idpair
cdef int32_t i
- for i in range(h.n[BCF_DT_ID]):
- idpair = h.id[BCF_DT_ID] + i
+ for i in range(hdr.n[BCF_DT_ID]):
+ idpair = hdr.id[BCF_DT_ID] + i
if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF:
return True
return False
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef vdict_t *d = <vdict_t *>h.dict[BCF_DT_ID]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_ID]
cdef khiter_t k = kh_get_vdict(d, key)
if k == kh_end(d) or kh_val_vdict(d, k).info[self.type] & 0xF == 0xF:
@@ -571,12 +626,12 @@ cdef class VariantHeaderMetadata(object):
return makeVariantMetadata(self.header, self.type, kh_val_vdict(d, k).id)
def __iter__(self):
- cdef bcf_hdr_t *h = self.header.ptr
+ cdef bcf_hdr_t *hdr = self.header.ptr
cdef bcf_idpair_t *idpair
cdef int32_t i
- for i in range(h.n[BCF_DT_ID]):
- idpair = h.id[BCF_DT_ID] + i
+ for i in range(hdr.n[BCF_DT_ID]):
+ idpair = hdr.id[BCF_DT_ID] + i
if idpair.key and idpair.val and idpair.val.info[self.type] & 0xF != 0xF:
yield idpair.key
@@ -644,8 +699,8 @@ cdef class VariantContig(object):
property name:
"""contig name"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- return h.id[BCF_DT_CTG][self.id].key
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ return hdr.id[BCF_DT_CTG][self.id].key
property id:
"""contig internal id number"""
@@ -655,15 +710,15 @@ cdef class VariantContig(object):
property length:
"""contig length or None if not available"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef uint32_t length = h.id[BCF_DT_CTG][self.id].val.info[0]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef uint32_t length = hdr.id[BCF_DT_CTG][self.id].val.info[0]
return length if length else None
property header:
""":class:`VariantHeaderRecord` associated with this :class:`VariantContig` object"""
def __get__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef bcf_hrec_t *hrec = h.id[BCF_DT_CTG][self.id].val.hrec[0]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef bcf_hrec_t *hrec = hdr.id[BCF_DT_CTG][self.id].val.hrec[0]
return makeVariantHeaderRecord(self.header, hrec)
@@ -685,26 +740,26 @@ cdef class VariantHeaderContigs(object):
"""mapping from contig name or index to :class:`VariantContig` object."""
def __len__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- assert kh_size(<vdict_t *>h.dict[BCF_DT_CTG]) == h.n[BCF_DT_CTG]
- return h.n[BCF_DT_CTG]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ assert kh_size(<vdict_t *>hdr.dict[BCF_DT_CTG]) == hdr.n[BCF_DT_CTG]
+ return hdr.n[BCF_DT_CTG]
def __bool__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- assert kh_size(<vdict_t *>h.dict[BCF_DT_CTG]) == h.n[BCF_DT_CTG]
- return h.n[BCF_DT_CTG] != 0
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ assert kh_size(<vdict_t *>hdr.dict[BCF_DT_CTG]) == hdr.n[BCF_DT_CTG]
+ return hdr.n[BCF_DT_CTG] != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.header.ptr
+ cdef bcf_hdr_t *hdr = self.header.ptr
cdef int index
if isinstance(key, int):
index = key
- if index < 0 or index >= h.n[BCF_DT_CTG]:
+ if index < 0 or index >= hdr.n[BCF_DT_CTG]:
raise IndexError('invalid contig index')
return makeVariantContig(self.header, index)
- cdef vdict_t *d = <vdict_t *>h.dict[BCF_DT_CTG]
+ cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_CTG]
cdef khiter_t k = kh_get_vdict(d, key)
if k == kh_end(d):
@@ -715,14 +770,14 @@ cdef class VariantHeaderContigs(object):
return makeVariantContig(self.header, id)
def __iter__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef vdict_t *d = <vdict_t *>h.dict[BCF_DT_CTG]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_CTG]
cdef uint32_t n = kh_size(d)
- assert n == h.n[BCF_DT_CTG]
+ assert n == hdr.n[BCF_DT_CTG]
for i in range(n):
- yield bcf_hdr_id2name(h, i)
+ yield bcf_hdr_id2name(hdr, i)
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -770,6 +825,14 @@ cdef class VariantHeaderContigs(object):
#TODO: implement __richcmp__
+ def add(self, id, **kwargs):
+ """Add a new contig record"""
+ if id in self:
+ raise ValueError('Header already exists for contig {}'.format(id))
+
+ items = [('ID', id)] + kwargs.items()
+ self.header.add_meta('contig', items=items)
+
cdef VariantHeaderContigs makeVariantHeaderContigs(VariantHeader header):
if not header:
@@ -791,26 +854,25 @@ cdef class VariantHeaderSamples(object):
return bcf_hdr_nsamples(self.header.ptr) != 0
def __getitem__(self, index):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef int32_t n = bcf_hdr_nsamples(h)
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef int32_t n = bcf_hdr_nsamples(hdr)
cdef int32_t i = index
if i < 0 or i >= n:
raise IndexError('invalid sample index')
- return h.samples[i]
+ return hdr.samples[i]
def __iter__(self):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef int32_t n = bcf_hdr_nsamples(h)
- cdef int32_t i
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef int32_t i, n = bcf_hdr_nsamples(hdr)
for i in range(n):
- yield h.samples[i]
+ yield hdr.samples[i]
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.header.ptr
- cdef vdict_t *d = <vdict_t *>h.dict[BCF_DT_SAMPLE]
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef vdict_t *d = <vdict_t *>hdr.dict[BCF_DT_SAMPLE]
cdef khiter_t k = kh_get_vdict(d, key)
return k != kh_end(d)
@@ -820,6 +882,10 @@ cdef class VariantHeaderSamples(object):
#TODO: implement __richcmp__
+ def add(self, name):
+ """Add a new sample"""
+ self.header.add_sample(name)
+
cdef VariantHeaderSamples makeVariantHeaderSamples(VariantHeader header):
if not header:
@@ -839,17 +905,12 @@ cdef class VariantHeader(object):
#FIXME: Add mutable methods
# See makeVariantHeader for C constructor
- def __cinit__(self, mode):
+ def __cinit__(self):
self.ptr = NULL
# Python constructor
- def __init__(self, mode):
- if mode not in 'rw':
- raise ValueError("invalid header mode specified '{}'".format(mode))
-
- mode = force_bytes(mode)
- self.ptr = bcf_hdr_init(mode)
-
+ def __init__(self):
+ self.ptr = bcf_hdr_init(b'w')
if not self.ptr:
raise ValueError('cannot create VariantHeader')
@@ -900,6 +961,18 @@ cdef class VariantHeader(object):
def __get__(self):
return makeVariantHeaderMetadata(self, BCF_HL_FMT)
+ property alts:
+ """
+ alt metadata (:class:`dict` ID->record). The data returned just a snapshot of alt records,
+ is created every time the property is requested, and modifications will not be reflected
+ in the header metadata and vice versa.
+
+ i.e. it is just a dict that reflects the state of alt records at the time it is created.
+ """
+ def __get__(self):
+ return { record['ID']:record for record in self.records if record.key.upper() == 'ALT' }
+
+
# only safe to do when opening an htsfile
cdef _subset_samples(self, include_samples):
keep_samples = set(self.samples)
@@ -926,13 +999,67 @@ cdef class VariantHeader(object):
free(hstr)
return force_str(hstr)
+ def add_record(self, VariantHeaderRecord record):
+ """Add an existing :class:`VariantHeaderRecord` to this header"""
+ cdef bcf_hrec_t *r = record.ptr
+
+ if r.type == BCF_HL_GEN:
+ self.add_meta(r.key, r.value)
+ else:
+ items = [(k,v) for k,v in record.attrs if k != 'IDX']
+ self.add_meta(r.key, items=items)
+
+ def add_line(self, line):
+ """Add a metadata line to this header"""
+ if bcf_hdr_append(self.ptr, line) < 0:
+ raise ValueError('invalid header line')
-cdef VariantHeader makeVariantHeader(bcf_hdr_t *h):
- if not h:
+ if self.ptr.dirty:
+ bcf_hdr_sync(self.ptr)
+
+ def add_meta(self, key, value=None, items=None):
+ """Add metadata to this header"""
+ if not ((value is not None) ^ (items is not None)):
+ raise ValueError('either value or items must be specified')
+
+ cdef bcf_hrec_t *hrec = <bcf_hrec_t*>calloc(1, sizeof(bcf_hrec_t))
+ cdef int quoted
+
+ try:
+ hrec.key = strdup(key)
+
+ if value is not None:
+ hrec.value = strdup(value)
+ else:
+ for key, value in items:
+ bcf_hrec_add_key(hrec, key, len(key))
+
+ value = str(value)
+ quoted = strpbrk(value, ' ;,"\t<>') != NULL
+ bcf_hrec_set_val(hrec, hrec.nkeys-1, value, len(value), quoted)
+ except:
+ bcf_hrec_destroy(hrec)
+ raise
+
+ bcf_hdr_add_hrec(self.ptr, hrec)
+
+ if self.ptr.dirty:
+ bcf_hdr_sync(self.ptr)
+
+ def add_sample(self, name):
+ """Add a new sample to this header"""
+ if bcf_hdr_add_sample(self.ptr, name) < 0:
+ raise ValueError('Duplicated sample name: {}'.format(name))
+ if self.ptr.dirty:
+ bcf_hdr_sync(self.ptr)
+
+
+cdef VariantHeader makeVariantHeader(bcf_hdr_t *hdr):
+ if not hdr:
raise ValueError('cannot create VariantHeader')
- cdef VariantHeader header = VariantHeader.__new__(VariantHeader, None)
- header.ptr = h
+ cdef VariantHeader header = VariantHeader.__new__(VariantHeader)
+ header.ptr = hdr
return header
@@ -952,7 +1079,7 @@ cdef class VariantRecordFilter(object):
return self.record.ptr.d.n_flt != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef int index, id
cdef int n = r.d.n_flt
@@ -968,20 +1095,20 @@ cdef class VariantRecordFilter(object):
if key == '.':
key = 'PASS'
- id = bcf_hdr_id2int(h, BCF_DT_ID, key)
+ id = bcf_hdr_id2int(hdr, BCF_DT_ID, key)
- if not bcf_hdr_idinfo_exists(h, BCF_HL_FLT, id) or not bcf_has_filter(h, self.record.ptr, key):
+ if not bcf_hdr_idinfo_exists(hdr, BCF_HL_FLT, id) or not bcf_has_filter(hdr, self.record.ptr, key):
raise KeyError('Invalid filter')
return makeVariantMetadata(self.record.header, BCF_HL_FLT, id)
def __iter__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef int i, n = r.d.n_flt
for i in range(n):
- yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.flt[i])
+ yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.flt[i])
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -991,9 +1118,9 @@ cdef class VariantRecordFilter(object):
return default
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- return bcf_has_filter(h, r, key) == 1
+ return bcf_has_filter(hdr, r, key) == 1
def iterkeys(self):
"""D.iterkeys() -> an iterator over the keys of D"""
@@ -1047,7 +1174,7 @@ cdef class VariantRecordFormat(object):
return self.record.ptr.n_fmt != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef bcf_fmt_t *fmt
cdef int index
@@ -1059,19 +1186,19 @@ cdef class VariantRecordFormat(object):
raise IndexError('invalid format index')
fmt = &r.d.fmt[index]
else:
- fmt = bcf_get_fmt(h, r, key)
+ fmt = bcf_get_fmt(hdr, r, key)
if not fmt:
raise KeyError('unknown format')
return makeVariantMetadata(self.record.header, BCF_HL_FMT, fmt.id)
def __iter__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef int i, n = r.n_fmt
for i in range(n):
- yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.fmt[i].id)
+ yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.fmt[i].id)
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -1081,9 +1208,9 @@ cdef class VariantRecordFormat(object):
return default
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef bcf_fmt_t *fmt = bcf_get_fmt(h, r, key)
+ cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, key)
return fmt != NULL
def iterkeys(self):
@@ -1139,9 +1266,9 @@ cdef class VariantRecordInfo(object):
return self.record.ptr.n_info != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef bcf_info_t *info = bcf_get_info(h, r, key)
+ cdef bcf_info_t *info = bcf_get_info(hdr, r, key)
if not info:
raise KeyError('Unknown INFO field: {}'.format(key))
@@ -1149,12 +1276,12 @@ cdef class VariantRecordInfo(object):
return bcf_info_value(info)
def __iter__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef int i, n = r.n_info
for i in range(n):
- yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.info[i].key)
+ yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.info[i].key)
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -1164,9 +1291,9 @@ cdef class VariantRecordInfo(object):
return default
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef bcf_info_t *info = bcf_get_info(h, r, key)
+ cdef bcf_info_t *info = bcf_get_info(hdr, r, key)
return info != NULL
@@ -1186,14 +1313,14 @@ cdef class VariantRecordInfo(object):
def iteritems(self):
"""D.iteritems() -> an iterator over the (key, value) items of D"""
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef bcf_info_t *info
cdef int i, n = r.n_info
for i in range(n):
info = &r.d.info[i]
- key = bcf_hdr_int2id(h, BCF_DT_ID, info.key)
+ key = bcf_hdr_int2id(hdr, BCF_DT_ID, info.key)
value = bcf_info_value(info)
yield key, value
@@ -1226,7 +1353,7 @@ cdef VariantRecordInfo makeVariantRecordInfo(VariantRecord record):
cdef class VariantRecordSamples(object):
- """mapping from sample index or name to :class:`makeVariantRecordSample` object."""
+ """mapping from sample index or name to :class:`VariantRecordSample` object."""
def __len__(self):
return bcf_hdr_nsamples(self.record.header.ptr)
@@ -1235,9 +1362,9 @@ cdef class VariantRecordSamples(object):
return bcf_hdr_nsamples(self.record.header.ptr) != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int n = bcf_hdr_nsamples(h)
+ cdef int n = bcf_hdr_nsamples(hdr)
cdef int sample_index
cdef vdict_t *d
cdef khiter_t k
@@ -1245,7 +1372,7 @@ cdef class VariantRecordSamples(object):
if isinstance(key, int):
sample_index = key
else:
- sample_index = bcf_hdr_id2int(h, BCF_DT_SAMPLE, key)
+ sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, key)
if sample_index < 0:
raise KeyError('invalid sample name')
@@ -1255,12 +1382,12 @@ cdef class VariantRecordSamples(object):
return makeVariantRecordSample(self.record, sample_index)
def __iter__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t i, n = bcf_hdr_nsamples(h)
+ cdef int32_t i, n = bcf_hdr_nsamples(hdr)
for i in range(n):
- yield h.samples[i]
+ yield hdr.samples[i]
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -1270,9 +1397,9 @@ cdef class VariantRecordSamples(object):
return default
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int n = bcf_hdr_nsamples(h)
+ cdef int n = bcf_hdr_nsamples(hdr)
cdef int sample_index
cdef vdict_t *d
cdef khiter_t k
@@ -1280,7 +1407,7 @@ cdef class VariantRecordSamples(object):
if isinstance(key, int):
sample_index = key
else:
- sample_index = bcf_hdr_id2int(h, BCF_DT_SAMPLE, key)
+ sample_index = bcf_hdr_id2int(hdr, BCF_DT_SAMPLE, key)
if sample_index < 0:
raise KeyError('invalid sample name')
@@ -1292,21 +1419,21 @@ cdef class VariantRecordSamples(object):
def itervalues(self):
"""D.itervalues() -> an iterator over the values of D"""
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t i, n = bcf_hdr_nsamples(h)
+ cdef int32_t i, n = bcf_hdr_nsamples(hdr)
for i in range(n):
yield makeVariantRecordSample(self.record, i)
def iteritems(self):
"""D.iteritems() -> an iterator over the (key, value) items of D"""
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t i, n = bcf_hdr_nsamples(h)
+ cdef int32_t i, n = bcf_hdr_nsamples(hdr)
for i in range(n):
- yield h.samples[i], makeVariantRecordSample(self.record, i)
+ yield hdr.samples[i], makeVariantRecordSample(self.record, i)
def keys(self):
"""D.keys() -> list of D's keys"""
@@ -1330,10 +1457,10 @@ cdef VariantRecordSamples makeVariantRecordSamples(VariantRecord record):
if not record:
raise ValueError('invalid VariantRecord')
- cdef VariantRecordSamples genos = VariantRecordSamples.__new__(VariantRecordSamples)
- genos.record = record
+ cdef VariantRecordSamples samples = VariantRecordSamples.__new__(VariantRecordSamples)
+ samples.record = record
- return genos
+ return samples
cdef class VariantRecord(object):
@@ -1348,41 +1475,82 @@ cdef class VariantRecord(object):
"""internal reference id number"""
def __get__(self):
return self.ptr.rid
+ def __set__(self, rid):
+ cdef bcf_hdr_t *hdr = self.header.ptr
+ cdef int r = rid
+ if rid < 0 or r >= hdr.n[BCF_DT_CTG] or not hdr.id[BCF_DT_CTG][r].val:
+ raise ValueError('invalid reference id')
+ self.ptr.rid = r
property chrom:
"""chromosome/contig name"""
def __get__(self):
return bcf_hdr_id2name(self.header.ptr, self.ptr.rid)
+ def __set__(self, chrom):
+ cdef vdict_t *d = <vdict_t*>self.header.ptr.dict[BCF_DT_CTG]
+ cdef khint_t k = kh_get_vdict(d, chrom)
+ if k == kh_end(d):
+ raise ValueError('Invalid chromosome/contig')
+ self.ptr.rid = kh_val_vdict(d, k).id
property contig:
"""chromosome/contig name"""
def __get__(self):
return bcf_hdr_id2name(self.header.ptr, self.ptr.rid)
+ def __set__(self, chrom):
+ cdef vdict_t *d = <vdict_t*>self.header.ptr.dict[BCF_DT_CTG]
+ cdef khint_t k = kh_get_vdict(d, chrom)
+ if k == kh_end(d):
+ raise ValueError('Invalid chromosome/contig')
+ self.ptr.rid = kh_val_vdict(d, k).id
property pos:
"""record start position on chrom/contig (1-based inclusive)"""
def __get__(self):
return self.ptr.pos + 1
+ def __set__(self, pos):
+ if pos < 1:
+ raise ValueError('Position must be positive')
+ # FIXME: check start <= stop?
+ self.ptr.pos = pos - 1
property start:
"""record start position on chrom/contig (0-based inclusive)"""
def __get__(self):
return self.ptr.pos
+ def __set__(self, start):
+ if start < 0:
+ raise ValueError('Start coordinate must be non-negative')
+ # FIXME: check start <= stop?
+ self.ptr.pos = start
property stop:
"""record stop position on chrom/contig (0-based exclusive)"""
def __get__(self):
return self.ptr.pos + self.ptr.rlen
+ def __set__(self, stop):
+ if stop < self.ptr.pos:
+ raise ValueError('Stop coordinate must be greater than or equal to start')
+ self.ptr.rlen = stop - self.ptr.pos
property rlen:
- """record length on chrom/contig (rec.stop - rec.start)"""
+ """record length on chrom/contig (typically rec.stop - rec.start unless END info is supplied)"""
def __get__(self):
return self.ptr.rlen
+ def __set__(self, rlen):
+ if rlen < 0:
+ raise ValueError('Reference length must be non-negative')
+ self.ptr.rlen = rlen
property qual:
"""phred scaled quality score or None if not available"""
def __get__(self):
return self.ptr.qual if not bcf_float_is_missing(self.ptr.qual) else None
+ def __set__(self, qual):
+ if qual is not None:
+ self.ptr.qual = qual
+ else:
+ memcpy(&self.ptr.qual, &bcf_float_missing, 4)
# property n_info:
# def __get__(self):
@@ -1423,6 +1591,12 @@ cdef class VariantRecord(object):
raise ValueError('Error unpacking VariantRecord')
id = self.ptr.d.id
return id if id != b'.' else None
+ def __set__(self, id):
+ cdef char *idstr = NULL
+ if id is not None:
+ idstr = id
+ if bcf_update_id(self.header.ptr, self.ptr, idstr) < 0:
+ raise ValueError('Error updating id')
property ref:
"""reference allele"""
@@ -1430,6 +1604,10 @@ cdef class VariantRecord(object):
if bcf_unpack(self.ptr, BCF_UN_STR) < 0:
raise ValueError('Error unpacking VariantRecord')
return self.ptr.d.allele[0] if self.ptr.d.allele else None
+ def __set__(self, ref):
+ alleles = list(self.alleles)
+ alleles[0] = ref
+ self.alleles = alleles
property alleles:
"""tuple of reference allele followed by alt alleles"""
@@ -1439,6 +1617,12 @@ cdef class VariantRecord(object):
if not self.ptr.d.allele:
return None
return tuple(self.ptr.d.allele[i] for i in range(self.ptr.n_allele))
+ def __set__(self, values):
+ if bcf_unpack(self.ptr, BCF_UN_STR) < 0:
+ raise ValueError('Error unpacking VariantRecord')
+ values = ','.join(values)
+ if bcf_update_alleles_str(self.header.ptr, self.ptr, values) < 0:
+ raise ValueError('Error updating alleles')
property alts:
"""tuple of alt alleles"""
@@ -1448,6 +1632,10 @@ cdef class VariantRecord(object):
if self.ptr.n_allele < 2 or not self.ptr.d.allele:
return None
return tuple(self.ptr.d.allele[i] for i in range(1,self.ptr.n_allele))
+ def __set__(self, alts):
+ alleles = [self.ref]
+ alleles.extend(alts)
+ self.alleles = alleles
property filter:
"""filter information (see :class:`VariantRecordFilter`)"""
@@ -1533,27 +1721,27 @@ cdef class VariantRecordSample(object):
property name:
"""sample name"""
def __get__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t n = bcf_hdr_nsamples(h)
+ cdef int32_t n = bcf_hdr_nsamples(hdr)
if self.index < 0 or self.index >= n:
raise ValueError('invalid sample index')
- return h.samples[self.index]
+ return hdr.samples[self.index]
property allele_indices:
"""allele indices for called genotype, if present. Otherwise None"""
def __get__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t n = bcf_hdr_nsamples(h)
+ cdef int32_t n = bcf_hdr_nsamples(hdr)
if self.index < 0 or self.index >= n or not r.n_fmt:
return None
cdef bcf_fmt_t *fmt0 = r.d.fmt
- cdef int gt0 = is_gt_fmt(h, fmt0)
+ cdef int gt0 = is_gt_fmt(hdr, fmt0)
if not gt0 or not fmt0.n:
return None
@@ -1587,16 +1775,16 @@ cdef class VariantRecordSample(object):
property alleles:
"""alleles for called genotype, if present. Otherwise None"""
def __get__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef int32_t nsamples = bcf_hdr_nsamples(h)
+ cdef int32_t nsamples = bcf_hdr_nsamples(hdr)
cdef int32_t nalleles = r.n_allele
if self.index < 0 or self.index >= nsamples or not r.n_fmt:
return None
cdef bcf_fmt_t *fmt0 = r.d.fmt
- cdef int gt0 = is_gt_fmt(h, fmt0)
+ cdef int gt0 = is_gt_fmt(hdr, fmt0)
if not gt0 or not fmt0.n:
return None
@@ -1631,6 +1819,55 @@ cdef class VariantRecordSample(object):
return tuple(alleles)
+ property phased:
+ """False if genotype is missing or any allele is unphased. Otherwise True."""
+ def __get__(self):
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
+ cdef bcf1_t *r = self.record.ptr
+ cdef int32_t n = bcf_hdr_nsamples(hdr)
+
+ if self.index < 0 or self.index >= n or not r.n_fmt:
+ return False
+
+ cdef bcf_fmt_t *fmt0 = r.d.fmt
+ cdef int gt0 = is_gt_fmt(hdr, fmt0)
+
+ if not gt0 or not fmt0.n:
+ return False
+
+ cdef int8_t *data8
+ cdef int16_t *data16
+ cdef int32_t *data32
+
+ phased = False
+
+ if fmt0.type == BCF_BT_INT8:
+ data8 = <int8_t *>(fmt0.p + self.index * fmt0.size)
+ for i in range(fmt0.n):
+ if data8[i] == bcf_int8_vector_end:
+ break
+ if i and data8[i] & 1 == 0:
+ return False
+ phased = True
+ elif fmt0.type == BCF_BT_INT16:
+ data16 = <int16_t *>(fmt0.p + self.index * fmt0.size)
+ for i in range(fmt0.n):
+ if data16[i] == bcf_int16_vector_end:
+ break
+ if i and data16[i] & 1 == 0:
+ return False
+ phased = True
+ elif fmt0.type == BCF_BT_INT32:
+ data32 = <int32_t *>(fmt0.p + self.index * fmt0.size)
+ for i in range(fmt0.n):
+ if data32[i] == bcf_int32_vector_end:
+ break
+ if i and data32[i] & 1 == 0:
+ return False
+ phased = True
+
+ return phased
+
def __len__(self):
return self.record.ptr.n_fmt
@@ -1638,7 +1875,7 @@ cdef class VariantRecordSample(object):
return self.record.ptr.n_fmt != 0
def __getitem__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef bcf_fmt_t *fmt
cdef int index
@@ -1649,12 +1886,12 @@ cdef class VariantRecordSample(object):
raise IndexError('invalid format index')
fmt = r.d.fmt + index
else:
- fmt = bcf_get_fmt(h, r, key)
+ fmt = bcf_get_fmt(hdr, r, key)
if not fmt:
raise KeyError('invalid format requested')
- if is_gt_fmt(h, fmt):
+ if is_gt_fmt(hdr, fmt):
return self.alleles
elif fmt.p and fmt.n and fmt.size:
return bcf_array_to_object(fmt.p + self.index * fmt.size, fmt.type, fmt.n, scalar=1)
@@ -1662,12 +1899,12 @@ cdef class VariantRecordSample(object):
return None
def __iter__(self):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
cdef int i, n = r.n_fmt
for i in range(n):
- yield bcf_hdr_int2id(h, BCF_DT_ID, r.d.fmt[i].id)
+ yield bcf_hdr_int2id(hdr, BCF_DT_ID, r.d.fmt[i].id)
def get(self, key, default=None):
"""D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."""
@@ -1677,9 +1914,9 @@ cdef class VariantRecordSample(object):
return default
def __contains__(self, key):
- cdef bcf_hdr_t *h = self.record.header.ptr
+ cdef bcf_hdr_t *hdr = self.record.header.ptr
cdef bcf1_t *r = self.record.ptr
- cdef bcf_fmt_t *fmt = bcf_get_fmt(h, r, key)
+ cdef bcf_fmt_t *fmt = bcf_get_fmt(hdr, r, key)
return fmt != NULL
def iterkeys(self):
@@ -1905,6 +2142,8 @@ cdef class BCFIterator(BaseIterator):
raise ValueError('bcf index required')
cdef BCFIndex index = bcf.index
+ cdef int rid, cstart, cstop
+ cdef char *cregion
if not index:
raise ValueError('bcf index required')
@@ -1916,7 +2155,9 @@ cdef class BCFIterator(BaseIterator):
if contig is not None or start is not None or stop is not None:
raise ValueError # FIXME
- self.iter = bcf_itr_querys(index.ptr, bcf.header.ptr, region)
+ cregion = region
+ with nogil:
+ self.iter = bcf_itr_querys(index.ptr, bcf.header.ptr, cregion)
else:
if contig is None:
raise ValueError # FIXME
@@ -1928,7 +2169,10 @@ cdef class BCFIterator(BaseIterator):
if stop is None:
stop = MAX_POS
- self.iter = bcf_itr_queryi(index.ptr, rid, start, stop)
+ cstart, cstop = start, stop
+
+ with nogil:
+ self.iter = bcf_itr_queryi(index.ptr, rid, cstart, cstop)
# Do not fail on self.iter == NULL, since it signifies a null query.
@@ -1953,7 +2197,10 @@ cdef class BCFIterator(BaseIterator):
if self.bcf.drop_samples:
record.max_unpack = BCF_UN_SHR
- cdef int ret = bcf_itr_next(self.bcf.htsfile, self.iter, record)
+ cdef int ret
+
+ with nogil:
+ ret = bcf_itr_next(self.bcf.htsfile, self.iter, record)
if ret < 0:
_stop_BCFIterator(self, record)
@@ -2031,7 +2278,10 @@ cdef class TabixIterator(BaseIterator):
if not self.iter:
raise StopIteration
- cdef int ret = tbx_itr_next(self.bcf.htsfile, self.index.ptr, self.iter, &self.line_buffer)
+ cdef int ret
+
+ with nogil:
+ ret = tbx_itr_next(self.bcf.htsfile, self.index.ptr, self.iter, &self.line_buffer)
if ret < 0:
tbx_itr_destroy(self.iter)
@@ -2191,7 +2441,8 @@ cdef class VariantFile(object):
if self.drop_samples:
record.max_unpack = BCF_UN_SHR
- ret = bcf_read1(self.htsfile, self.header.ptr, record)
+ with nogil:
+ ret = bcf_read1(self.htsfile, self.header.ptr, record)
if ret < 0:
bcf_destroy1(record)
@@ -2209,9 +2460,13 @@ cdef class VariantFile(object):
raise ValueError
cdef VariantFile vars = VariantFile.__new__(VariantFile)
+ cdef bcf_hdr_t *hdr
+ cdef char *cfilename, *cmode
# FIXME: re-open using fd or else header and index could be invalid
- vars.htsfile = hts_open(self.filename, self.mode)
+ cfilename, cmode = self.filename, self.mode
+ with nogil:
+ vars.htsfile = hts_open(cfilename, cmode)
if not vars.htsfile:
raise ValueError('Cannot re-open htsfile')
@@ -2232,7 +2487,9 @@ cdef class VariantFile(object):
if self.htsfile.is_bin:
vars.seek(self.tell())
else:
- makeVariantHeader(bcf_hdr_read(vars.htsfile))
+ with nogil:
+ hdr = bcf_hdr_read(vars.htsfile)
+ makeVariantHeader(hdr)
return vars
@@ -2242,6 +2499,11 @@ cdef class VariantFile(object):
If open is called on an existing VariantFile, the current file will be
closed and a new file will be opened.
"""
+ cdef bcf_hdr_t *hdr
+ cdef hts_idx_t *idx
+ cdef tbx_t *tidx
+ cdef char *cfilename, *cmode
+
# close a previously opened file
if self.is_open:
self.close()
@@ -2285,33 +2547,47 @@ cdef class VariantFile(object):
# open file. Header gets written to file at the same time for bam files
# and sam files (in the latter case, the mode needs to be wh)
- self.htsfile = hts_open(filename, mode)
+ cfilename, cmode = filename, mode
+ with nogil:
+ self.htsfile = hts_open(cfilename, cmode)
if not self.htsfile:
raise ValueError("could not open file `{}` (mode='{}')".format((filename, mode)))
- bcf_hdr_write(self.htsfile, self.header.ptr)
+ with nogil:
+ bcf_hdr_write(self.htsfile, self.header.ptr)
elif mode[0] == b'r':
# open file for reading
if filename != b'-' and not self.is_remote and not os.path.exists(filename):
raise IOError('file `{}` not found'.format(filename))
- self.htsfile = hts_open(filename, mode)
+ cfilename, cmode = filename, mode
+ with nogil:
+ self.htsfile = hts_open(cfilename, cmode)
if not self.htsfile:
raise ValueError("could not open file `{}` (mode='{}') - is it VCF/BCF format?".format((filename, mode)))
- self.header = makeVariantHeader(bcf_hdr_read(self.htsfile))
+ with nogil:
+ hdr = bcf_hdr_read(self.htsfile)
+ self.header = makeVariantHeader(hdr)
if not self.header:
raise ValueError("file `{}` does not have valid header (mode='{}') - is it BCF format?".format((filename, mode)))
# check for index and open if present
if self.htsfile.format.format == bcf:
- self.index = makeBCFIndex(self.header, bcf_index_load(filename))
+ cfilename = filename
+ with nogil:
+ idx = bcf_index_load(cfilename)
+ self.index = makeBCFIndex(self.header, idx)
else:
- self.index = makeTabixIndex(tbx_index_load(filename + '.tbi'))
+ tabix_filename = filename + '.tbi'
+ cfilename = tabix_filename
+ with nogil:
+ tidx = tbx_index_load(cfilename)
+ self.index = makeTabixIndex(tidx)
if not self.is_stream:
self.start_offset = self.tell()
@@ -2327,10 +2603,15 @@ cdef class VariantFile(object):
if self.is_stream:
raise OSError('seek not available in streams')
+ cdef int ret
if self.htsfile.format.compression != no_compression:
- return bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET)
+ with nogil:
+ ret = bgzf_seek(hts_get_bgzfp(self.htsfile), offset, SEEK_SET)
else:
- return hts_useek(self.htsfile, offset, SEEK_SET)
+ with nogil:
+ ret = hts_useek(self.htsfile, offset, SEEK_SET)
+ return ret
+
def tell(self):
"""return current file position, see :meth:`pysam.VariantFile.seek`."""
@@ -2339,10 +2620,14 @@ cdef class VariantFile(object):
if self.is_stream:
raise OSError('tell not available in streams')
+ cdef int ret
if self.htsfile.format.compression != no_compression:
- return bgzf_tell(hts_get_bgzfp(self.htsfile))
+ with nogil:
+ ret = bgzf_tell(hts_get_bgzfp(self.htsfile))
else:
- return hts_utell(self.htsfile)
+ with nogil:
+ ret = hts_utell(self.htsfile)
+ return ret
def fetch(self, contig=None, start=None, stop=None, region=None, reopen=False):
"""fetch records in a :term:`region` using 0-based indexing. The
@@ -2391,7 +2676,10 @@ cdef class VariantFile(object):
if not self.is_open:
return 0
- cdef int ret = bcf_write1(self.htsfile, self.header.ptr, record.ptr)
+ cdef int ret
+
+ with nogil:
+ ret = bcf_write1(self.htsfile, self.header.ptr, record.ptr)
if ret < 0:
raise ValueError('write failed')
diff --git a/pysam/cfaidx.pxd b/pysam/cfaidx.pxd
index b7926df..34e825e 100644
--- a/pysam/cfaidx.pxd
+++ b/pysam/cfaidx.pxd
@@ -3,13 +3,39 @@ from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
from libc.stdlib cimport malloc, calloc, realloc, free
from libc.string cimport memcpy, memcmp, strncpy, strlen, strdup
from libc.stdio cimport FILE, printf
+cimport cython
-from chtslib cimport faidx_t, kseq_t, gzFile
-
-cdef extern from "htslib/kstring.h" nogil:
- ctypedef struct kstring_t:
- size_t l, m
- char *s
+from cpython cimport array
+from pysam.chtslib cimport faidx_t, gzFile, kstring_t
+
+# These functions are put here and not in chtslib.pxd in order
+# to avoid warnings for unused functions.
+cdef extern from "pysam_stream.h" nogil:
+
+ ctypedef struct kstream_t:
+ pass
+
+ ctypedef struct kseq_t:
+ kstring_t name
+ kstring_t comment
+ kstring_t seq
+ kstring_t qual
+
+ gzFile gzopen(char *, char *)
+ kseq_t *kseq_init(gzFile)
+ int kseq_read(kseq_t *)
+ void kseq_destroy(kseq_t *)
+ int gzclose(gzFile)
+
+ kstream_t *ks_init(gzFile)
+ void ks_destroy(kstream_t *)
+
+ # Retrieve characters from stream until delimiter
+ # is reached placing results in str.
+ int ks_getuntil(kstream_t *,
+ int delimiter,
+ kstring_t * str,
+ int * dret)
cdef class FastaFile:
cdef object _filename, _references, _lengths, reference2length
@@ -20,23 +46,35 @@ cdef class FastaFile:
cdef class FastqProxy:
cdef kseq_t * _delegate
+ cdef cython.str tostring(self)
+ cpdef array.array get_quality_array(self, int offset=*)
+
+
+cdef class PersistentFastqProxy:
+ """
+ Python container for pysam.cfaidx.FastqProxy with persistence.
+ """
+ cdef public str comment, quality, sequence, name
+ cdef cython.str tostring(self)
+ cpdef array.array get_quality_array(self, int offset=*)
cdef class FastxFile:
cdef object _filename
cdef gzFile fastqfile
cdef kseq_t * entry
+ cdef bint persist
cdef kseq_t * getCurrent(self)
cdef int cnext(self)
+
# Compatibility Layer for pysam 0.8.1
cdef class FastqFile(FastxFile):
pass
+
# Compatibility Layer for pysam < 0.8
cdef class Fastafile(FastaFile):
pass
-cdef class Fastqfile(FastxFile):
- pass
diff --git a/pysam/cfaidx.pyx b/pysam/cfaidx.pyx
index 5338299..a1dc488 100644
--- a/pysam/cfaidx.pyx
+++ b/pysam/cfaidx.pyx
@@ -1,16 +1,53 @@
-# cython: embedsignature=True
+ # cython: embedsignature=True
# cython: profile=True
-# adds doc-strings for sphinx
+###############################################################################
+###############################################################################
+# Cython wrapper for SAM/BAM/CRAM files based on htslib
+###############################################################################
+# The principal classes defined in this module are:
+#
+# class FastaFile random read read/write access to faidx indexd files
+# class FastxFile streamed read/write access to fasta/fastq files
+#
+# Additionally this module defines several additional classes that are part
+# of the internal API. These are:
+#
+# class FastqProxy
+# class PersistentFastqProxy
+#
+# For backwards compatibility, the following classes are also defined:
+#
+# class Fastafile equivalent to FastaFile
+# class FastqFile equivalent to FastxFile
+#
+###############################################################################
+#
+# The MIT License
+#
+# Copyright (c) 2015 Andreas Heger
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+###############################################################################
import sys
import os
-
-cdef class FastqProxy
-cdef makeFastqProxy(kseq_t * src):
- '''enter src into AlignedRead.'''
- cdef FastqProxy dest = FastqProxy.__new__(FastqProxy)
- dest._delegate = src
- return dest
-
+from cpython cimport array
from cpython cimport PyErr_SetString, \
PyBytes_Check, \
@@ -19,68 +56,58 @@ from cpython cimport PyErr_SetString, \
from cpython.version cimport PY_MAJOR_VERSION
-from chtslib cimport \
+from pysam.chtslib cimport \
faidx_nseq, fai_load, fai_destroy, fai_fetch, \
- faidx_fetch_seq, gzopen, gzclose, \
- kseq_init, kseq_destroy, kseq_read
-
+ faidx_seq_len, \
+ faidx_fetch_seq, gzopen, gzclose
-########################################################################
-########################################################################
-########################################################################
-## Python 3 compatibility functions
-########################################################################
-IS_PYTHON3 = PY_MAJOR_VERSION >= 3
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport encode_filename, from_string_and_size
+from pysam.cutils cimport qualitystring_to_array, parse_region
-# filename encoding (copied from lxml.etree.pyx)
-cdef str _FILENAME_ENCODING
-_FILENAME_ENCODING = sys.getfilesystemencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = sys.getdefaultencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = 'ascii'
-
-#cdef char* _C_FILENAME_ENCODING
-#_C_FILENAME_ENCODING = <char*>_FILENAME_ENCODING
-
-cdef bytes _encodeFilename(object filename):
- """Make sure a filename is 8-bit encoded (or None)."""
- if filename is None:
- return None
- elif PyBytes_Check(filename):
- return filename
- elif PyUnicode_Check(filename):
- return filename.encode(_FILENAME_ENCODING)
- else:
- raise TypeError, u"Argument must be string or unicode."
-
-
-
-#####################################################################
-# hard-coded constants
-cdef int max_pos = 2 << 29
+cdef class FastqProxy
+cdef makeFastqProxy(kseq_t * src):
+ '''enter src into AlignedRead.'''
+ cdef FastqProxy dest = FastqProxy.__new__(FastqProxy)
+ dest._delegate = src
+ return dest
## TODO:
## add automatic indexing.
## add function to get sequence names.
cdef class FastaFile:
- '''*(filename)*
+ """Random access to fasta formatted files that
+ have been indexed by :term:`faidx`.
+
+ The file is automatically opened. The index file of file
+ ``<filename>`` is expected to be called ``<filename>.fai``.
+
+ Parameters
+ ----------
+
+ filename : string
+ Filename of fasta file to be opened.
- A *FASTA* file. The file is automatically opened.
+ Raises
+ ------
+
+ ValueError
+ if index file is missing
- This class expects an indexed fasta file and permits
- random access to fasta sequences.
- '''
+ IOError
+ if file could not be opened
- def __cinit__(self, *args, **kwargs ):
+ """
+
+ def __cinit__(self, *args, **kwargs):
self.fastafile = NULL
self._filename = None
self._references = None
self._lengths = None
self.reference2length = None
- self._open( *args, **kwargs )
+ self._open(*args, **kwargs)
- def _isOpen( self ):
+ def is_open(self):
'''return true if samfile has been opened.'''
return self.fastafile != NULL
@@ -97,15 +124,18 @@ cdef class FastaFile:
'''
# close a previously opened file
- if self.fastafile != NULL: self.close()
- self._filename = _encodeFilename(filename)
- self.fastafile = fai_load(self._filename)
+ if self.fastafile != NULL:
+ self.close()
+ self._filename = encode_filename(filename)
+ cdef char *cfilename = self._filename
+ with nogil:
+ self.fastafile = fai_load(cfilename)
if self.fastafile == NULL:
raise IOError("could not open file `%s`" % filename)
# read index
- if not os.path.exists( self._filename + b".fai" ):
+ if not os.path.exists(self._filename + b".fai"):
raise ValueError("could not locate index file")
with open( self._filename + b".fai" ) as inf:
@@ -114,16 +144,32 @@ cdef class FastaFile:
self._lengths = tuple(int(x[1]) for x in data)
self.reference2length = dict(zip(self._references, self._lengths))
- def close( self ):
+ def close(self):
+ """close the file."""
if self.fastafile != NULL:
- fai_destroy( self.fastafile )
+ fai_destroy(self.fastafile)
self.fastafile = NULL
def __dealloc__(self):
self.close()
+ # context manager interface
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+ return False
+
+ property closed:
+ """"bool indicating the current state of the file object.
+ This is a read-only attribute; the close() method changes the value.
+ """
+ def __get__(self):
+ return not self.is_open()
+
property filename:
- '''filename associated with this object.'''
+ """filename associated with this object. This is a read-only attribute."""
def __get__(self):
return self._filename
@@ -133,12 +179,13 @@ cdef class FastaFile:
return self._references
property nreferences:
- '''number of :term:`reference` sequences in the file.'''
+ """"int with the number of :term:`reference` sequences in the file.
+ This is a read-only attribute."""
def __get__(self):
return len(self._references) if self.references else None
property lengths:
- '''tuple with the lengths of :term:`reference` sequences.'''
+ """tuple with the lengths of :term:`reference` sequences."""
def __get__(self):
return self._lengths
@@ -147,84 +194,88 @@ cdef class FastaFile:
start=None,
end=None,
region=None):
+ """fetch sequences in a :term:`region`.
- '''*(reference = None, start = None, end = None, region = None)*
+ A region can
+ either be specified by :term:`reference`, `start` and
+ `end`. `start` and `end` denote 0-based, half-open
+ intervals.
- fetch sequences in a :term:`region` using 0-based indexing.
+ Alternatively, a samtools :term:`region` string can be
+ supplied.
+
+ If any of the coordinates are missing they will be replaced by the
+ minimum (`start`) or maximum (`end`) coordinate.
- The region is specified by :term:`reference`, *start* and *end*.
+ Note that region strings are 1-based, while `start` and `end` denote
+ an interval in python coordinates.
+ The region is specified by :term:`reference`, `start` and `end`.
+
+ Returns
+ -------
- fetch returns an empty string if the region is out of range or
- addresses an unknown *reference*.
+ string : a string with the sequence specified by the region.
- If *reference* is given and *start* is None, the sequence from the
- first base is returned. Similarly, if *end* is None, the sequence
- until the last base is returned.
+ Raises
+ ------
- Alternatively, a samtools :term:`region` string can be supplied.
- '''
+ IndexError
+ if the coordinates are out of range
+
+ ValueError
+ if the region is invalid
- if not self._isOpen():
- raise ValueError( "I/O operation on closed file" )
+ """
+
+ if not self.is_open():
+ raise ValueError("I/O operation on closed file" )
cdef int length
- cdef char * seq
-
- if not region:
- if reference is None:
- raise ValueError('no sequence/region supplied.')
- if start is None:
- start = 0
- if end is None:
- end = max_pos - 1
-
- if start > end:
- raise ValueError(
- 'invalid region: start (%i) > end (%i)' % (start, end))
- if start == end:
- return b""
- # valid ranges are from 0 to 2^29-1
- if not 0 <= start < max_pos:
- raise IndexError('start out of range (%i)' % start)
- if not 0 <= end < max_pos:
- raise IndexError('end out of range (%i)' % end)
- # note: faidx_fetch_seq has a bug such that out-of-range access
- # always returns the last residue. Hence do not use faidx_fetch_seq,
- # but use fai_fetch instead
- # seq = faidx_fetch_seq(self.fastafile,
- # reference,
- # start,
- # end-1,
- # &length)
- region = "%s:%i-%i" % (reference, start+1, end)
- if PY_MAJOR_VERSION >= 3:
- region = region.encode('ascii')
- seq = fai_fetch( self.fastafile,
- region,
- &length )
- else:
- # samtools adds a '\0' at the end
- seq = fai_fetch( self.fastafile, region, &length )
+ cdef char *seq
+ cdef char *ref
+ cdef int rstart, rend
+
+ reference, rstart, rend = parse_region(reference, start, end, region)
+
+ if reference is None:
+ raise ValueError("no sequence/region supplied.")
+
+ if rstart == rend:
+ return ""
+
+ ref = reference
+ length = faidx_seq_len(self.fastafile, ref)
+ if length == -1:
+ raise KeyError("sequence '%s' not present" % reference)
+ if rstart >= length:
+ return ""
+
+ # fai_fetch adds a '\0' at the end
+ with nogil:
+ seq = faidx_fetch_seq(self.fastafile,
+ ref,
+ rstart,
+ rend-1,
+ &length)
- # copy to python
if seq == NULL:
- return b""
- else:
- try:
- py_seq = seq[:length]
- finally:
- free(seq)
+ raise ValueError(
+ "failure when retrieving sequence on '%s'" % reference)
- return py_seq
+ try:
+ return charptr_to_str(seq)
+ finally:
+ free(seq)
- cdef char * _fetch( self, char * reference, int start, int end, int * length ):
+ cdef char * _fetch(self, char * reference, int start, int end, int * length):
'''fetch sequence for reference, start and end'''
- return faidx_fetch_seq(self.fastafile,
- reference,
- start,
- end-1,
- length )
+ with nogil:
+ return faidx_fetch_seq(self.fastafile,
+ reference,
+ start,
+ end-1,
+ length)
def get_reference_length(self, reference):
'''return the length of reference.'''
@@ -243,80 +294,176 @@ cdef class FastqProxy:
property name:
def __get__(self):
- return self._delegate.name.s
+ return charptr_to_str(self._delegate.name.s)
property sequence:
def __get__(self):
- return self._delegate.seq.s
+ return charptr_to_str(self._delegate.seq.s)
property comment:
def __get__(self):
if self._delegate.comment.l:
- return self._delegate.comment.s
- else: return None
+ return charptr_to_str(self._delegate.comment.s)
+ else:
+ return None
property quality:
def __get__(self):
if self._delegate.qual.l:
- return self._delegate.qual.s
- else: return None
+ return charptr_to_str(self._delegate.qual.s)
+ else:
+ return None
+
+ cdef cython.str tostring(self):
+ if self.comment is None:
+ comment = ""
+ else:
+ comment = " %s" % self.comment
+
+ if self.quality is None:
+ return ">%s%s\n%s" % (self.name, comment, self.sequence)
+ else:
+ return "@%s%s\n%s\n+\n%s" % (self.name, comment,
+ self.sequence, self.quality)
+
+ def __str__(self):
+ return self.tostring()
+
+ cpdef array.array get_quality_array(self, int offset=33):
+ '''return quality values as array after subtracting offset.'''
+ if self.quality is None:
+ return None
+ return qualitystring_to_array(force_bytes(self.quality),
+ offset=offset)
+
+cdef class PersistentFastqProxy:
+ """
+ Python container for pysam.cfaidx.FastqProxy with persistence.
+ Needed to compare multiple fastq records from the same file.
+ """
+ def __init__(self, FastqProxy FastqRead):
+ self.comment = FastqRead.comment
+ self.quality = FastqRead.quality
+ self.sequence = FastqRead.sequence
+ self.name = FastqRead.name
+
+ cdef cython.str tostring(self):
+ if self.comment is None:
+ comment = ""
+ else:
+ comment = " %s" % self.comment
+
+ if self.quality is None:
+ return ">%s%s\n%s" % (self.name, comment, self.sequence)
+ else:
+ return "@%s%s\n%s\n+\n%s" % (self.name, comment,
+ self.sequence, self.quality)
+
+ def __str__(self):
+ return self.tostring()
+
+ cpdef array.array get_quality_array(self, int offset=33):
+ '''return quality values as array after subtracting offset.'''
+ if self.quality is None:
+ return None
+ return qualitystring_to_array(force_bytes(self.quality),
+ offset=offset)
cdef class FastxFile:
- '''*(filename)*
+ """Stream access to :term:`fasta` or :term:`fastq` formatted files.
+
+ The file is automatically opened.
- A :term:`fastq` or :term:`fasta` formatted file. The file
- is automatically opened.
+ Entries in the file can be both fastq or fasta formatted or even a
+ mixture of the two.
- Entries in the file can be both fastq or fasta formatted
- or even a mixture of the two.
+ This file object permits iterating over all entries in the
+ file. Random access is not implemented. The iteration returns
+ objects of type :class:`FastqProxy`
- This file object permits iterating over all entries in
- the file. Random access is not implemented. The iteration
- returns objects of type :class:`FastqProxy`
+ Parameters
+ ----------
- '''
+ filename : string
+ Filename of fasta/fastq file to be opened.
+
+ persist : bool
+
+ If True (default) make a copy of the entry in the file during
+ iteration. If set to False, no copy will be made. This will
+ permit faster iteration, but an entry will not persist when
+ the iteration continues.
+
+ Raises
+ ------
+
+ IOError
+ if file could not be opened
+
+ """
def __cinit__(self, *args, **kwargs):
# self.fastqfile = <gzFile*>NULL
self._filename = None
self.entry = NULL
self._open(*args, **kwargs)
- def _isOpen( self ):
+ def is_open(self):
'''return true if samfile has been opened.'''
return self.entry != NULL
- def _open(self, filename):
- '''open a fastq/fasta file.
+ def _open(self, filename, persist=True):
+ '''open a fastq/fasta file in *filename*
+
+ Paramentes
+ ----------
+
+ persist : bool
+
+ if True return a copy of the underlying data (default
+ True). The copy will persist even if the iteration
+ on the file continues.
+
'''
self.close()
if not os.path.exists(filename):
raise IOError("no such file or directory: %s" % filename)
- filename = _encodeFilename(filename)
- self.fastqfile = gzopen(filename, "r")
- self.entry = kseq_init(self.fastqfile)
+ self.persist = persist
+
+ filename = encode_filename(filename)
+ cdef char *cfilename = filename
+ with nogil:
+ self.fastqfile = gzopen(cfilename, "r")
+ self.entry = kseq_init(self.fastqfile)
self._filename = filename
- def close( self ):
- '''close file.'''
+ def close(self):
+ '''close the file.'''
if self.entry != NULL:
gzclose(self.fastqfile)
if self.entry:
kseq_destroy(self.entry)
self.entry = NULL
-
+
def __dealloc__(self):
self.close()
+ property closed:
+ """"bool indicating the current state of the file object.
+ This is a read-only attribute; the close() method changes the value.
+ """
+ def __get__(self):
+ return not self.is_open()
+
property filename:
- '''filename associated with this object.'''
+ """string with the filename associated with this object."""
def __get__(self):
return self._filename
def __iter__(self):
- if not self._isOpen():
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
return self
@@ -326,15 +473,19 @@ cdef class FastxFile:
cdef int cnext(self):
'''C version of iterator
'''
- return kseq_read(self.entry)
+ with nogil:
+ return kseq_read(self.entry)
def __next__(self):
"""
python version of next().
"""
cdef int l
- l = kseq_read(self.entry)
+ with nogil:
+ l = kseq_read(self.entry)
if (l > 0):
+ if self.persist:
+ return PersistentFastqProxy(makeFastqProxy(self.entry))
return makeFastqProxy(self.entry)
else:
raise StopIteration
@@ -347,10 +498,9 @@ cdef class FastqFile(FastxFile):
cdef class Fastafile(FastaFile):
pass
-cdef class Fastqfile(FastxFile):
- pass
-
__all__ = ["FastaFile",
"FastqFile",
- "Fastafile",
- "Fastqfile"]
+ "FastxFile",
+ "Fastafile"]
+
+
diff --git a/pysam/chtslib.pxd b/pysam/chtslib.pxd
index d714072..299e84a 100644
--- a/pysam/chtslib.pxd
+++ b/pysam/chtslib.pxd
@@ -6,7 +6,6 @@ from libc.stdio cimport FILE, printf
from posix.types cimport off_t
cdef extern from "Python.h":
- long _Py_HashPointer(void*)
FILE* PyFile_AsFile(object)
@@ -23,6 +22,7 @@ cdef extern from "zlib.h" nogil:
char * gzgets(gzFile file, char *buf, int len)
int gzeof(gzFile file)
+
cdef extern from "htslib/kstring.h" nogil:
ctypedef struct kstring_t:
size_t l, m
@@ -363,7 +363,7 @@ cdef extern from "htslib/hts.h" nogil:
hFILE *hfile
void *voidp
- ctypedef enum htsFormatCategory:
+ cdef enum htsFormatCategory:
unknown_category
sequence_data # Sequence data -- SAM, BAM, CRAM, etc
variant_data # Variant calling data -- VCF, BCF, etc
@@ -371,14 +371,14 @@ cdef extern from "htslib/hts.h" nogil:
region_list # Coordinate intervals or regions -- BED, etc
category_maximum
- ctypedef enum htsExactFormat:
+ cdef enum htsExactFormat:
unknown_format
binary_format
text_format
sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed
format_maximum
- ctypedef enum htsCompression:
+ cdef enum htsCompression:
no_compression, gzip, bgzf, custom
compression_maximum
@@ -951,35 +951,7 @@ cdef extern from "htslib/sam.h" nogil:
# ctypedef bam_pileup1_t * const_bam_pileup1_t_ptr "const bam_pileup1_t *"
-cdef extern from "pysam_stream.h" nogil:
-
- ctypedef struct kstream_t:
- pass
-
- ctypedef struct kseq_t:
- kstring_t name
- kstring_t comment
- kstring_t seq
- kstring_t qual
-
- gzFile gzopen(char *, char *)
- kseq_t *kseq_init(gzFile)
- int kseq_read(kseq_t *)
- void kseq_destroy(kseq_t *)
- int gzclose(gzFile)
-
- kstream_t *ks_init(gzFile)
- void ks_destroy(kstream_t *)
-
- # Retrieve characters from stream until delimiter
- # is reached placing results in str.
- int ks_getuntil(kstream_t *,
- int delimiter,
- kstring_t * str,
- int * dret)
-
-
-cdef extern from "htslib/faidx.h":
+cdef extern from "htslib/faidx.h" nogil:
ctypedef struct faidx_t:
pass
@@ -1702,3 +1674,8 @@ cdef extern from "htslib/vcf.h" nogil:
int bcf_itr_next(htsFile *fp, hts_itr_t *iter, void *r)
hts_idx_t *bcf_index_load(const char *fn)
const char **bcf_index_seqnames(const hts_idx_t *idx, const bcf_hdr_t *hdr, int *nptr)
+
+cdef extern from "htslib_util.h":
+
+ int hts_set_verbosity(int verbosity)
+ int hts_get_verbosity()
diff --git a/pysam/chtslib.pyx b/pysam/chtslib.pyx
index 2f91396..eab229f 100644
--- a/pysam/chtslib.pyx
+++ b/pysam/chtslib.pyx
@@ -1,94 +1,19 @@
# cython: embedsignature=True
# cython: profile=True
# adds doc-strings for sphinx
-import tempfile
-import os
-import sys
-import types
-import itertools
-import struct
-import ctypes
-import collections
-import re
-import platform
-import warnings
-from cpython cimport PyErr_SetString, \
- PyBytes_Check, \
- PyUnicode_Check, \
- PyBytes_FromStringAndSize
+from pysam.chtslib cimport *
-from cpython.version cimport PY_MAJOR_VERSION
-
-########################################################################
-########################################################################
-########################################################################
-## Python 3 compatibility functions
-########################################################################
-IS_PYTHON3 = PY_MAJOR_VERSION >= 3
-
-cdef from_string_and_size(char* s, size_t length):
- if PY_MAJOR_VERSION < 3:
- return s[:length]
- else:
- return s[:length].decode("ascii")
-
-# filename encoding (copied from lxml.etree.pyx)
-cdef str _FILENAME_ENCODING
-_FILENAME_ENCODING = sys.getfilesystemencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = sys.getdefaultencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = 'ascii'
-
-#cdef char* _C_FILENAME_ENCODING
-#_C_FILENAME_ENCODING = <char*>_FILENAME_ENCODING
-
-cdef bytes _encodeFilename(object filename):
- """Make sure a filename is 8-bit encoded (or None)."""
- if filename is None:
- return None
- elif PyBytes_Check(filename):
- return filename
- elif PyUnicode_Check(filename):
- return filename.encode(_FILENAME_ENCODING)
- else:
- raise TypeError, u"Argument must be string or unicode."
-
-cdef _forceStr(object s):
- """Return s converted to str type of current Python
- (bytes in Py2, unicode in Py3)"""
- if s is None:
- return None
- if PY_MAJOR_VERSION < 3:
- return s
- elif PyBytes_Check(s):
- return s.decode('ascii')
- else:
- # assume unicode
- return s
-
-cdef bytes _forceBytes(object s):
- u"""convert string or unicode object to bytes, assuming ascii encoding.
+cpdef set_verbosity(int verbosity):
+ u"""Set htslib's hts_verbose global variable to the specified value.
"""
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode('ascii')
- else:
- raise TypeError, u"Argument must be string, bytes or unicode."
+ return hts_set_verbosity(verbosity)
-cdef inline bytes _forceCmdlineBytes(object s):
- return _forceBytes(s)
-
-cdef _charptr_to_str(char* s):
- if PY_MAJOR_VERSION < 3:
- return s
- else:
- return s.decode("ascii")
+cpdef get_verbosity():
+ u"""Return the value of htslib's hts_verbose global variable.
+ """
+ return hts_get_verbosity()
-__all__ = []
+__all__ = [
+ "get_verbosity",
+ "set_verbosity"]
diff --git a/pysam/csamfile.pxd b/pysam/csamfile.pxd
index f027b29..a76a599 100644
--- a/pysam/csamfile.pxd
+++ b/pysam/csamfile.pxd
@@ -1,10 +1,10 @@
-from calignmentfile cimport AlignedSegment, AlignmentFile
+from pysam.calignmentfile cimport AlignedSegment, AlignmentFile
#################################################
# Compatibility Layer for pysam < 0.8
# import all declarations from htslib
-from chtslib cimport *
+from pysam.chtslib cimport *
cdef class AlignedRead(AlignedSegment):
pass
diff --git a/pysam/csamfile.pyx b/pysam/csamfile.pyx
index b76ce89..ed9d79b 100644
--- a/pysam/csamfile.pyx
+++ b/pysam/csamfile.pyx
@@ -19,7 +19,7 @@ from cpython cimport PyErr_SetString, \
from cpython.version cimport PY_MAJOR_VERSION
-from calignmentfile cimport AlignmentFile, AlignedSegment
+from pysam.calignmentfile cimport AlignmentFile, AlignedSegment
cdef class Samfile(AlignmentFile):
diff --git a/pysam/csamtools.pxd b/pysam/csamtools.pxd
index 10c89ba..53e04ea 100644
--- a/pysam/csamtools.pxd
+++ b/pysam/csamtools.pxd
@@ -1,19 +1,8 @@
from libc.stdlib cimport calloc, free
cdef extern from "pysam_util.h":
+
int pysam_dispatch(int argc, char *argv[])
void pysam_set_stderr(int fd)
void pysam_unset_stderr()
-
-cdef extern from "sam.h":
-
- ctypedef struct bam1_t
-
- # functions not actually declared in sam.h, but available
- # as extern
- #
- # implemented in samtools/bam_md.c
- int bam_prob_realn(bam1_t *b, char *ref)
- int bam_cap_mapQ(bam1_t *b, char *ref, int thres)
-
diff --git a/pysam/csamtools.pyx b/pysam/csamtools.pyx
index 7a3dd1f..c49f668 100644
--- a/pysam/csamtools.pyx
+++ b/pysam/csamtools.pyx
@@ -4,35 +4,8 @@
import tempfile
import os
import sys
-import platform
-from cpython cimport PyBytes_Check, PyUnicode_Check
-from cpython.version cimport PY_MAJOR_VERSION
-
-########################################################################
-########################################################################
-########################################################################
-## Python 3 compatibility functions
-########################################################################
-IS_PYTHON3 = PY_MAJOR_VERSION >= 3
-
-cdef bytes _forceBytes(object s):
- u"""convert string or unicode object to bytes, assuming ascii encoding.
- """
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode('ascii')
- else:
- raise TypeError, u"Argument must be string, bytes or unicode."
-
-
-cdef inline bytes _forceCmdlineBytes(object s):
- return _forceBytes(s)
+from pysam.cutils cimport force_bytes, force_cmdline_bytes
class Outs:
'''http://mail.python.org/pipermail/python-list/2000-June/038406.html'''
@@ -71,8 +44,8 @@ class Outs:
def _samtools_dispatch(method,
- args = (),
- catch_stdout = True):
+ args=(),
+ catch_stdout=True):
'''call ``method`` in samtools providing arguments in args.
.. note::
@@ -105,12 +78,12 @@ def _samtools_dispatch(method,
# redirect stderr and stdout to file
stderr_h, stderr_f = tempfile.mkstemp()
pysam_set_stderr(stderr_h)
-
+
if catch_stdout:
stdout_h, stdout_f = tempfile.mkstemp()
try:
- stdout_save = Outs( sys.stdout.fileno() )
- stdout_save.setfd( stdout_h )
+ stdout_save = Outs(sys.stdout.fileno())
+ stdout_save.setfd(stdout_h)
except AttributeError:
# stdout has already been redirected
catch_stdout = False
@@ -121,53 +94,53 @@ def _samtools_dispatch(method,
if method == "view":
if "-o" in args:
raise ValueError("option -o is forbidden in samtools view")
- args = ( "-o", stdout_f ) + args
+ args = ("-o", stdout_f) + args
# do the function call to samtools
cdef char ** cargs
cdef int i, n, retval
n = len(args)
- method = _forceCmdlineBytes(method)
- args = [ _forceCmdlineBytes(a) for a in args ]
+ method = force_cmdline_bytes(method)
+ args = [force_cmdline_bytes(a) for a in args ]
# allocate two more for first (dummy) argument (contains command)
- cargs = <char**>calloc( n+2, sizeof( char *) )
+ cargs = <char**>calloc(n + 2, sizeof(char *))
cargs[0] = "samtools"
cargs[1] = method
- for i from 0 <= i < n: cargs[i+2] = args[i]
+ for i from 0 <= i < n:
+ cargs[i + 2] = args[i]
retval = pysam_dispatch(n+2, cargs)
- free( cargs )
+ free(cargs)
# restore stdout/stderr. This will also flush, so
# needs to be before reading back the file contents
if catch_stdout:
stdout_save.restore()
try:
- with open( stdout_f, "r") as inf:
+ with open(stdout_f, "r") as inf:
out_stdout = inf.readlines()
except UnicodeDecodeError:
with open( stdout_f, "rb") as inf:
# read binary output
out_stdout = inf.read()
- os.remove( stdout_f )
+ os.remove(stdout_f)
else:
out_stdout = []
# get error messages
pysam_unset_stderr()
+ out_stderr = []
try:
- with open( stderr_f, "r") as inf:
+ with open(stderr_f, "r") as inf:
out_stderr = inf.readlines()
except UnicodeDecodeError:
with open( stderr_f, "rb") as inf:
# read binary output
out_stderr = inf.read()
- else:
- out_stderr = []
finally:
- os.remove( stderr_f )
+ os.remove(stderr_f)
return retval, out_stderr, out_stdout
diff --git a/pysam/ctabix.pxd b/pysam/ctabix.pxd
index 31bf7c1..2d7c546 100644
--- a/pysam/ctabix.pxd
+++ b/pysam/ctabix.pxd
@@ -13,8 +13,38 @@ cdef extern from "unistd.h" nogil:
ssize_t read(int fd, void *buf, size_t count)
int close(int fd)
-from chtslib cimport hts_idx_t, hts_itr_t, htsFile, \
- kstream_t, kstring_t, gzFile, tbx_t
+from pysam.chtslib cimport hts_idx_t, hts_itr_t, htsFile, \
+ gzFile, tbx_t, kstring_t
+
+# These functions are put here and not in chtslib.pxd in order
+# to avoid warnings for unused functions.
+cdef extern from "pysam_stream.h" nogil:
+
+ ctypedef struct kstream_t:
+ pass
+
+ ctypedef struct kseq_t:
+ kstring_t name
+ kstring_t comment
+ kstring_t seq
+ kstring_t qual
+
+ gzFile gzopen(char *, char *)
+ kseq_t *kseq_init(gzFile)
+ int kseq_read(kseq_t *)
+ void kseq_destroy(kseq_t *)
+ int gzclose(gzFile)
+
+ kstream_t *ks_init(gzFile)
+ void ks_destroy(kstream_t *)
+
+ # Retrieve characters from stream until delimiter
+ # is reached placing results in str.
+ int ks_getuntil(kstream_t *,
+ int delimiter,
+ kstring_t * str,
+ int * dret)
+
cdef class tabix_file_iterator:
cdef gzFile fh
@@ -45,11 +75,6 @@ cdef class TabixFile:
cdef encoding
-###########################################
-# used by cvcf.pyx
-cdef _force_str(object s, encoding=?)
-
-###########################################
cdef class Parser:
cdef encoding
diff --git a/pysam/ctabix.pyx b/pysam/ctabix.pyx
index 056c21e..58d0ffb 100644
--- a/pysam/ctabix.pyx
+++ b/pysam/ctabix.pyx
@@ -1,5 +1,57 @@
# cython: embedsignature=True
-# adds doc-strings for sphinx
+# cython: profile=True
+###############################################################################
+###############################################################################
+# Cython wrapper for access to tabix indexed files in bgzf format
+###############################################################################
+# The principal classes and functions defined in this module are:
+#
+# class TabixFile class wrapping tabix indexed files in bgzf format
+#
+# class asTuple Parser class for tuples
+# class asGT Parser class for GTF formatted rows
+# class asBed Parser class for Bed formatted rows
+# class asVCF Parser class for VCF formatted rows
+#
+# class tabix_generic_iterator Streamed iterator of bgzf formatted files
+#
+# Additionally this module defines several additional classes that are part
+# of the internal API. These are:
+#
+# class Parser base class for parsers of tab-separated rows
+# class tabix_file_iterator
+# class TabixIterator iterator class over rows in bgzf file
+# class EmptyIterator
+#
+# For backwards compatibility, the following classes are also defined:
+#
+# class Tabixfile equivalent to TabixFile
+#
+###############################################################################
+#
+# The MIT License
+#
+# Copyright (c) 2015 Andreas Heger
+#
+# Permission is hereby granted, free of charge, to any person obtaining a
+# copy of this software and associated documentation files (the "Software"),
+# to deal in the Software without restriction, including without limitation
+# the rights to use, copy, modify, merge, publish, distribute, sublicense,
+# and/or sell copies of the Software, and to permit persons to whom the
+# Software is furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+# DEALINGS IN THE SOFTWARE.
+#
+###############################################################################
import os
import sys
@@ -14,73 +66,16 @@ from cpython cimport PyErr_SetString, PyBytes_Check, \
from cpython.version cimport PY_MAJOR_VERSION
-cimport TabProxies
+cimport pysam.ctabixproxies as ctabixproxies
-from chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
- BGZF, bgzf_open, bgzf_close, bgzf_write, \
- ks_init, ks_destroy, gzFile, ks_getuntil, kstring_t, \
+from pysam.chtslib cimport htsFile, hts_open, hts_close, HTS_IDX_START,\
+ BGZF, bgzf_open, bgzf_close, bgzf_write, gzFile, \
tbx_index_build, tbx_index_load, tbx_itr_queryi, tbx_itr_querys, \
tbx_conf_t, tbx_seqnames, tbx_itr_next, tbx_itr_destroy, \
tbx_destroy, gzopen, gzclose, gzerror, gzdopen
-PYTHON3 = PY_MAJOR_VERSION >= 3
-
-# filename encoding (copied from lxml.etree.pyx)
-cdef str _FILENAME_ENCODING
-_FILENAME_ENCODING = sys.getfilesystemencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = sys.getdefaultencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = 'ascii'
-
-#cdef char* _C_FILENAME_ENCODING
-#_C_FILENAME_ENCODING = <char*>_FILENAME_ENCODING
-
-cdef inline bytes _encodeFilename(object filename):
- u"""Make sure a filename is 8-bit encoded (or None).
- """
- if filename is None:
- return None
- elif PyBytes_Check(filename):
- return filename
- elif PyUnicode_Check(filename):
- return filename.encode(_FILENAME_ENCODING)
- else:
- raise TypeError, u"Argument must be string or unicode."
-
-cdef inline bytes _force_bytes(object s, encoding="ascii"):
- u"""convert string or unicode object to bytes, assuming ascii encoding.
- """
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode(encoding)
- else:
- raise TypeError, u"Argument must be string, bytes or unicode."
-
-cdef inline _charptr_to_str(char* s, encoding="ascii"):
- if PY_MAJOR_VERSION < 3:
- return s
- else:
- return s.decode(encoding)
-
-cdef _force_str(object s, encoding="ascii"):
- """Return s converted to str type of current Python
- (bytes in Py2, unicode in Py3)"""
- if s is None:
- return None
- if PY_MAJOR_VERSION < 3:
- return s
- elif PyBytes_Check(s):
- return s.decode(encoding)
- else:
- # assume unicode
- return s
-
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport encode_filename, from_string_and_size
cdef class Parser:
@@ -107,8 +102,8 @@ cdef class asTuple(Parser):
A field in a row is accessed by numeric index.
'''
cdef parse(self, char * buffer, int len):
- cdef TabProxies.TupleProxy r
- r = TabProxies.TupleProxy(self.encoding)
+ cdef ctabixproxies.TupleProxy r
+ r = ctabixproxies.TupleProxy(self.encoding)
# need to copy - there were some
# persistence issues with "present"
r.copy(buffer, len)
@@ -156,8 +151,8 @@ cdef class asGTF(Parser):
'''
cdef parse(self, char * buffer, int len):
- cdef TabProxies.GTFProxy r
- r = TabProxies.GTFProxy(self.encoding)
+ cdef ctabixproxies.GTFProxy r
+ r = ctabixproxies.GTFProxy(self.encoding)
r.copy(buffer, len)
return r
@@ -199,13 +194,13 @@ cdef class asBed(Parser):
+-----------+-----------+------------------------------------------+
Only the first three fields are required. Additional
- fields are optional, but if one is defined, all the preceeding
+ fields are optional, but if one is defined, all the preceding
need to be defined as well.
'''
cdef parse(self, char * buffer, int len):
- cdef TabProxies.BedProxy r
- r = TabProxies.BedProxy(self.encoding)
+ cdef ctabixproxies.BedProxy r
+ r = ctabixproxies.BedProxy(self.encoding)
r.copy(buffer, len)
return r
@@ -245,26 +240,54 @@ cdef class asVCF(Parser):
'''
cdef parse(self, char * buffer, int len):
- cdef TabProxies.VCFProxy r
- r = TabProxies.VCFProxy(self.encoding)
+ cdef ctabixproxies.VCFProxy r
+ r = ctabixproxies.VCFProxy(self.encoding)
r.copy(buffer, len)
return r
cdef class TabixFile:
- '''*(filename, mode='r', parser = None)*
+ """Random access to bgzf formatted files that
+ have been indexed by :term:`tabix`.
- opens a :term:`tabix file` for reading. A missing
- index (*filename* + ".tbi") will raise an exception. *index*
- specifies an alternative name of the index.
+ The file is automatically opened. The index file of file
+ ``<filename>`` is expected to be called ``<filename>.tbi``
+ by default (see parameter `index`).
+
+ Parameters
+ ----------
+
+ filename : string
+ Filename of bgzf file to be opened.
- *parser* sets the default parser for this tabix file. If *parser*
- is None, the results are returned as an unparsed string.
- Otherwise, *parser* is assumed to be a functor that will return
- parsed data (see for example :class:`~pysam.asTuple` and
- :class:`~pysam.asGTF`).
+ index : string
+ The filename of the index. If not set, the default is to
+ assume that the index is called ``filename.tbi`
- '''
+ mode : char
+ The file opening mode. Currently, only ``r`` is permitted.
+
+ parser : :class:`pysam.Parser`
+
+ sets the default parser for this tabix file. If `parser`
+ is None, the results are returned as an unparsed string.
+ Otherwise, `parser` is assumed to be a functor that will return
+ parsed data (see for example :class:`~pysam.asTuple` and
+ :class:`~pysam.asGTF`).
+
+ encoding : string
+
+ The encoding passed to the parser
+
+ Raises
+ ------
+
+ ValueError
+ if index file is missing.
+
+ IOError
+ if file could not be opened
+ """
def __cinit__(self,
filename,
mode = 'r',
@@ -307,15 +330,21 @@ cdef class TabixFile:
self._filename_index = filename_index
# encode all the strings to pass to tabix
- _encoded_filename = _encodeFilename(filename)
- _encoded_index = _encodeFilename(filename_index)
+ _encoded_filename = encode_filename(filename)
+ _encoded_index = encode_filename(filename_index)
# open file
- self.tabixfile = hts_open(_encoded_filename, 'r')
+ cdef char *cfilename = _encoded_filename
+ with nogil:
+ self.tabixfile = hts_open(cfilename, 'r')
+
if self.tabixfile == NULL:
raise IOError("could not open file `%s`" % filename)
- self.index = tbx_index_load(_encoded_index)
+ cfilename = _encoded_index
+ with nogil:
+ self.index = tbx_index_load(cfilename)
+
if self.index == NULL:
raise IOError("could not open index for `%s`" % filename)
@@ -330,7 +359,7 @@ cdef class TabixFile:
index=self._filename_index,
encoding=self.encoding)
- def _isOpen(self):
+ def is_open(self):
'''return true if samfile has been opened.'''
return self.tabixfile != NULL
@@ -362,7 +391,7 @@ cdef class TabixFile:
some overhead, so beware.
'''
- if not self._isOpen():
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
# convert coordinates to region string
@@ -381,6 +410,7 @@ cdef class TabixFile:
# get iterator
cdef hts_itr_t * iter
+ cdef char *cstr
cdef TabixFile fileobj
# reopen the same file if necessary
@@ -391,13 +421,16 @@ cdef class TabixFile:
if region is None:
# without region or reference - iterate from start
- iter = tbx_itr_queryi(fileobj.index,
- HTS_IDX_START,
- 0,
- 0)
+ with nogil:
+ iter = tbx_itr_queryi(fileobj.index,
+ HTS_IDX_START,
+ 0,
+ 0)
else:
- s = _force_bytes(region, encoding=fileobj.encoding)
- iter = tbx_itr_querys(fileobj.index, s)
+ s = force_bytes(region, encoding=fileobj.encoding)
+ cstr = s
+ with nogil:
+ iter = tbx_itr_querys(fileobj.index, cstr)
if iter == NULL:
if region is None:
@@ -425,27 +458,53 @@ cdef class TabixFile:
return a
+ # context manager interface
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.close()
+ return False
+
###############################################################
###############################################################
###############################################################
## properties
###############################################################
+ property closed:
+ """"bool indicating the current state of the file object.
+ This is a read-only attribute; the close() method changes the value.
+ """
+ def __get__(self):
+ return not self.is_open()
+
property filename:
'''filename associated with this object.'''
def __get__(self):
- if not self._isOpen():
+ if not self.is_open():
raise ValueError("I/O operation on closed file")
return self._filename
property header:
'''the file header.
-
+
+ The file header consists of the lines at the beginning of a
+ file that are prefixed by the comment character ``#``.
+
+ .. note::
+ The header is returned as an iterator presenting lines
+ without the newline character.
+
.. note::
- The header is returned as an iterator presenting lines without the
- newline character.
+ The header is only available for local files. For remote
+ files an Attribute Error is raised.
+
'''
def __get__(self):
+ if self.isremote:
+ raise AttributeError(
+ "the header is not available for remote files")
return GZIteratorHead(self.filename)
property contigs:
@@ -514,12 +573,13 @@ cdef class TabixIterator:
cdef int retval
while 1:
-
- retval = tbx_itr_next(
- self.tabixfile.tabixfile,
- self.tabixfile.index,
- self.iterator,
- &self.buffer)
+ with nogil:
+ retval = tbx_itr_next(
+ self.tabixfile.tabixfile,
+ self.tabixfile.index,
+ self.iterator,
+ &self.buffer)
+
if retval < 0:
break
@@ -540,7 +600,7 @@ cdef class TabixIterator:
elif retval < 0:
raise StopIteration
- return _charptr_to_str(self.buffer.s, self.encoding)
+ return charptr_to_str(self.buffer.s, self.encoding)
def next(self):
return self.__next__()
@@ -603,8 +663,10 @@ cdef class GZIterator:
if not os.path.exists(filename):
raise IOError("No such file or directory: %s" % filename)
- filename = _encodeFilename(filename)
- self.gzipfile = gzopen(filename, "r")
+ filename = encode_filename(filename)
+ cdef char *cfilename = filename
+ with nogil:
+ self.gzipfile = gzopen(cfilename, "r")
self._filename = filename
self.kstream = ks_init(self.gzipfile)
self.encoding = encoding
@@ -629,7 +691,8 @@ cdef class GZIterator:
cdef int dret = 0
cdef int retval = 0
while 1:
- retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret)
+ with nogil:
+ retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret)
if retval < 0:
break
@@ -643,7 +706,7 @@ cdef class GZIterator:
cdef int retval = self.__cnext__()
if retval < 0:
raise StopIteration
- return _force_str(self.buffer.s, self.encoding)
+ return force_str(self.buffer.s, self.encoding)
cdef class GZIteratorHead(GZIterator):
@@ -706,12 +769,14 @@ def tabix_compress(filename_in,
WINDOW_SIZE = 64 * 1024
- fn = _encodeFilename(filename_out)
- fp = bgzf_open( fn, "w")
+ fn = encode_filename(filename_out)
+ cdef char *cfn = fn
+ with nogil:
+ fp = bgzf_open(cfn, "w")
if fp == NULL:
raise IOError("could not open '%s' for writing" % (filename_out, ))
- fn = _encodeFilename(filename_in)
+ fn = encode_filename(filename_in)
fd_src = open(fn, O_RDONLY)
if fd_src == 0:
raise IOError("could not open '%s' for reading" % (filename_in, ))
@@ -720,8 +785,9 @@ def tabix_compress(filename_in,
c = 1
while c > 0:
- c = read(fd_src, buffer, WINDOW_SIZE)
- r = bgzf_write(fp, buffer, c)
+ with nogil:
+ c = read(fd_src, buffer, WINDOW_SIZE)
+ r = bgzf_write(fp, buffer, c)
if r < 0:
free(buffer)
raise OSError("writing failed")
@@ -743,7 +809,7 @@ def tabix_index( filename,
preset = None,
meta_char = "#",
zerobased = False,
- min_shift = -1,
+ int min_shift = -1,
):
'''index tab-separated *filename* using tabix.
@@ -839,8 +905,10 @@ def tabix_index( filename,
conf.preset, conf.sc, conf.bc, conf.ec, conf.meta_char, conf.line_skip = conf_data
- fn = _encodeFilename(filename)
- tbx_index_build(fn, min_shift, &conf)
+ fn = encode_filename(filename)
+ cdef char *cfn = fn
+ with nogil:
+ tbx_index_build(cfn, min_shift, &conf)
return filename
@@ -967,8 +1035,8 @@ cdef class tabix_file_iterator:
cdef int dret = 0
cdef int retval = 0
while 1:
-
- retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret)
+ with nogil:
+ retval = ks_getuntil(self.kstream, '\n', &self.buffer, &dret)
if retval < 0:
break
@@ -1038,7 +1106,7 @@ class tabix_generic_iterator:
if not line:
break
- s = _force_bytes(line, encoding)
+ s = force_bytes(line, encoding)
b = s
nbytes = len(line)
assert b[nbytes] == '\0'
@@ -1076,14 +1144,14 @@ def tabix_iterator(infile, parser):
:class:`~pysam.asGTF`).
"""
- if PYTHON3:
+ if PY_MAJOR_VERSION >= 3:
return tabix_generic_iterator(infile, parser)
else:
return tabix_file_iterator(infile, parser)
# file objects can use C stdio
# used to be: isinstance( infile, file):
- # if PYTHON3:
+ # if PY_MAJOR_VERSION >= 3:
# if isinstance( infile, io.IOBase ):
# return tabix_copy_iterator( infile, parser )
# else:
diff --git a/pysam/ctabixproxies.pxd b/pysam/ctabixproxies.pxd
new file mode 100644
index 0000000..5317b81
--- /dev/null
+++ b/pysam/ctabixproxies.pxd
@@ -0,0 +1,59 @@
+#cdef extern from "Python.h":
+# ctypedef struct FILE
+
+from libc.stdint cimport uint8_t, int32_t, uint32_t, int64_t, uint64_t
+
+cdef class TupleProxy:
+
+ cdef:
+ char * data
+ char ** fields
+ int nfields
+ int index
+ int nbytes
+ int offset
+ bint is_modified
+
+ cdef encoding
+
+ cpdef int getMaxFields(self)
+ cpdef int getMinFields(self)
+# cdef char * _getindex(self, int idx)
+
+ cdef take(self, char * buffer, size_t nbytes)
+ cdef present(self, char * buffer, size_t nbytes)
+ cdef copy(self, char * buffer, size_t nbytes, bint reset=*)
+ cdef update(self, char * buffer, size_t nbytes)
+
+cdef class GTFProxy(TupleProxy) :
+
+ cdef:
+ char * _attributes
+ cdef bint hasOwnAttributes
+
+ cpdef int getMaxFields(self)
+ cpdef int getMinFields(self)
+ cdef char * getAttributes(self)
+
+cdef class NamedTupleProxy(TupleProxy):
+ pass
+
+cdef class BedProxy(NamedTupleProxy):
+
+ cdef:
+ char * contig
+ uint32_t start
+ uint32_t end
+ int bedfields
+
+ cpdef int getMaxFields(self)
+ cpdef int getMinFields(self)
+ cdef update(self, char * buffer, size_t nbytes)
+
+cdef class VCFProxy(NamedTupleProxy) :
+
+ cdef:
+ char * contig
+ uint32_t pos
+
+ cdef update(self, char * buffer, size_t nbytes)
diff --git a/pysam/TabProxies.pyx b/pysam/ctabixproxies.pyx
similarity index 87%
rename from pysam/TabProxies.pyx
rename to pysam/ctabixproxies.pyx
index 0add831..d72f082 100644
--- a/pysam/TabProxies.pyx
+++ b/pysam/ctabixproxies.pyx
@@ -1,72 +1,12 @@
-import types
-import sys
-import string
+from cpython cimport PyBytes_FromStringAndSize
-from cpython.version cimport PY_MAJOR_VERSION
-from cpython cimport PyErr_SetString, PyBytes_Check
-from cpython cimport PyUnicode_Check, PyBytes_FromStringAndSize
+from libc.stdio cimport printf, feof, fgets
+from libc.string cimport strcpy, strlen, memcmp, memcpy, memchr, strstr, strchr
+from libc.stdlib cimport free, malloc, calloc, realloc
+from libc.stdlib cimport atoi, atol, atof
-from libc.stdio cimport printf
-
-
-cdef from_string_and_size(char* s, size_t length):
- if PY_MAJOR_VERSION < 3:
- return s[:length]
- else:
- return s[:length].decode("ascii")
-
-# filename encoding (copied from lxml.etree.pyx)
-cdef str _FILENAME_ENCODING
-_FILENAME_ENCODING = sys.getfilesystemencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = sys.getdefaultencoding()
-if _FILENAME_ENCODING is None:
- _FILENAME_ENCODING = 'ascii'
-
-cdef bytes _force_bytes(object s, encoding="ascii"):
- u"""convert string or unicode object to bytes, assuming ascii encoding.
- """
- if PY_MAJOR_VERSION < 3:
- return s
- elif s is None:
- return None
- elif PyBytes_Check(s):
- return s
- elif PyUnicode_Check(s):
- return s.encode(encoding)
- else:
- raise TypeError, u"Argument must be string, bytes or unicode."
-
-cdef inline bytes _force_cmdline_bytes(object s):
- return _force_bytes(s)
-
-cdef _charptr_to_str(char* s, encoding="ascii"):
- if PY_MAJOR_VERSION < 3:
- return s
- else:
- return s.decode(encoding)
-
-cdef inline _force_str(object s, encoding="ascii"):
- """Return s converted to str type of current Python "
- "(bytes in Py2, unicode in Py3)"""
- if s is None:
- return None
- if PY_MAJOR_VERSION < 3:
- return s
- elif PyBytes_Check(s):
- return s.decode(encoding)
- else:
- # assume unicode
- return s
-
-cdef char * nextItem(char * buffer):
- cdef char * pos
- pos = strchr(buffer, '\t')
- if pos == NULL:
- raise ValueError("malformatted entry at %s" % buffer)
- pos[0] = '\0'
- pos += 1
- return pos
+from pysam.cutils cimport force_bytes, force_str, charptr_to_str
+from pysam.cutils cimport encode_filename, from_string_and_size
cdef char *StrOrEmpty(char * buffer):
if buffer == NULL:
@@ -74,9 +14,13 @@ cdef char *StrOrEmpty(char * buffer):
else: return buffer
cdef int isNew(char * p, char * buffer, size_t nbytes):
- if p == NULL:
- return 0
- return not (buffer <= p < buffer + nbytes)
+ """return True if `p` is located within `buffer` of size
+ `nbytes`
+ """
+ if p == NULL:
+ return 0
+ return not (buffer <= p < buffer + nbytes)
+
cdef class TupleProxy:
'''Proxy class for access to parsed row as a tuple.
@@ -113,6 +57,39 @@ cdef class TupleProxy:
if self.fields != NULL:
free(self.fields)
+ def __copy__(self):
+ if self.is_modified:
+ raise NotImplementedError(
+ "copying modified tuples is not implemented")
+ cdef TupleProxy n = type(self)()
+ n.copy(self.data, self.nbytes, reset=True)
+ return n
+
+ def compare(self, TupleProxy other):
+ '''return -1,0,1, if contents in this are binary
+ <,=,> to *other*
+
+ '''
+ if self.is_modified or other.is_modified:
+ raise NotImplementedError(
+ 'comparison of modified TupleProxies is not implemented')
+ if self.data == other.data:
+ return 0
+
+ if self.nbytes < other.nbytes:
+ return -1
+ elif self.nbytes > other.nbytes:
+ return 1
+ return memcmp(self.data, other.data, self.nbytes)
+
+ def __richcmp__(self, TupleProxy other, int op):
+ if op == 2: # == operator
+ return self.compare(other) == 0
+ elif op == 3: # != operator
+ return self.compare(other) != 0
+ else:
+ return NotImplemented
+
cdef take(self, char * buffer, size_t nbytes):
'''start presenting buffer.
@@ -129,30 +106,34 @@ cdef class TupleProxy:
'''
self.update(buffer, nbytes)
- cdef copy(self, char * buffer, size_t nbytes):
+ cdef copy(self, char * buffer, size_t nbytes, bint reset=False):
'''start presenting buffer of size *nbytes*.
Buffer is a '\0'-terminated string without the '\n'.
Take a copy of buffer.
'''
- cdef int s
# +1 for '\0'
- s = sizeof(char) * (nbytes + 1)
+ cdef int s = sizeof(char) * (nbytes + 1)
self.data = <char*>malloc(s)
if self.data == NULL:
raise ValueError("out of memory in TupleProxy.copy()")
- self.nbytes = nbytes
memcpy(<char*>self.data, buffer, s)
+
+ if reset:
+ for x from 0 <= x < nbytes:
+ if self.data[x] == '\0':
+ self.data[x] = '\t'
+
self.update(self.data, nbytes)
- cdef int getMinFields(self):
+ cpdef int getMinFields(self):
'''return minimum number of fields.'''
# 1 is not a valid tabix entry, but TupleProxy
# could be more generic.
return 1
- cdef int getMaxFields(self):
+ cpdef int getMaxFields(self):
'''return maximum number of fields. Return
0 for unknown length.'''
return 0
@@ -180,7 +161,7 @@ cdef class TupleProxy:
assert strlen(buffer) == nbytes, \
"length of buffer (%i) != number of bytes (%i)" % (
- strlen(buffer), nbytes)
+ strlen(buffer), nbytes)
if buffer[nbytes] != 0:
raise ValueError("incomplete line at %s" % buffer)
@@ -263,7 +244,7 @@ cdef class TupleProxy:
raise IndexError(
"list index out of range %i >= %i" %
(i, self.nfields))
- return _force_str(self.fields[i], self.encoding)
+ return force_str(self.fields[i], self.encoding)
def __getitem__(self, key):
if type(key) == int:
@@ -293,7 +274,7 @@ cdef class TupleProxy:
return
# conversion with error checking
- value = _force_bytes(value)
+ value = force_bytes(value)
cdef char * tmp = <char*>value
self.fields[idx] = <char*>malloc((strlen( tmp ) + 1) * sizeof(char))
if self.fields[idx] == NULL:
@@ -326,11 +307,12 @@ cdef class TupleProxy:
if retval == NULL:
return None
else:
- return _force_str(retval, self.encoding)
+ return force_str(retval, self.encoding)
def __str__(self):
'''return original data'''
# copy and replace \0 bytes with \t characters
+ cdef char * cpy
if self.is_modified:
# todo: treat NULL values
result = []
@@ -359,7 +341,7 @@ def toDot(v):
def quote(v):
'''return a quoted attribute.'''
- if type(v) in types.StringTypes:
+ if isinstance(v, str):
return '"%s"' % v
else:
return str(v)
@@ -388,11 +370,11 @@ cdef class GTFProxy(TupleProxy):
if self.hasOwnAttributes:
free(self._attributes)
- cdef int getMinFields(self):
+ cpdef int getMinFields(self):
'''return minimum number of fields.'''
return 9
- cdef int getMaxFields(self):
+ cpdef int getMaxFields(self):
'''return max number of fields.'''
return 9
@@ -495,7 +477,9 @@ cdef class GTFProxy(TupleProxy):
# ...; transcript_name "TXNRD2;-001"; ....
# The current heuristic is to split on a semicolon followed by a
# space, see also http://mblab.wustl.edu/GTF22.html
- fields = [x.strip() for x in attributes.split("; ")]
+
+ # Remove white space to prevent a last empty field.
+ fields = [x.strip() for x in attributes.strip().split("; ")]
result = {}
@@ -507,7 +491,7 @@ cdef class GTFProxy(TupleProxy):
# split at most once in order to avoid separating
# multi-word values
- d = [x.strip() for x in string.split(f, " ", maxsplit=1)]
+ d = [x.strip() for x in f.split(" ", 1)]
n,v = d[0], d[1]
if len(d) > 2:
@@ -540,7 +524,7 @@ cdef class GTFProxy(TupleProxy):
aa = []
for k,v in d.items():
- if type(v) in types.StringTypes:
+ if isinstance(v, str):
aa.append( '%s "%s"' % (k,v) )
else:
aa.append( '%s %s' % (k,str(v)) )
@@ -624,7 +608,7 @@ cdef class GTFProxy(TupleProxy):
# add space in order to make sure
# to not pick up a field that is a prefix of another field
- r = _force_bytes(item + " ")
+ r = force_bytes(item + " ")
query = r
start = strstr(attributes, query)
@@ -642,11 +626,11 @@ cdef class GTFProxy(TupleProxy):
while end[0] != '\0' and end[0] != '"':
end += 1
l = end - start
- result = _force_str(PyBytes_FromStringAndSize(start, l),
+ result = force_str(PyBytes_FromStringAndSize(start, l),
self.encoding)
return result
else:
- return _force_str(start, self.encoding)
+ return force_str(start, self.encoding)
def setAttribute(self, name, value):
'''convenience method to set an attribute.'''
@@ -673,7 +657,7 @@ cdef class NamedTupleProxy(TupleProxy):
if self.nfields < idx:
raise KeyError("field %s not set" % key)
if f == str:
- return _force_str(self.fields[idx],
+ return force_str(self.fields[idx],
self.encoding)
return f(self.fields[idx])
@@ -697,11 +681,11 @@ cdef class BedProxy(NamedTupleProxy):
'blockSizes': (10, str),
'blockStarts': (11, str), }
- cdef int getMinFields(self):
+ cpdef int getMinFields(self):
'''return minimum number of fields.'''
return 3
- cdef int getMaxFields(self):
+ cpdef int getMaxFields(self):
'''return max number of fields.'''
return 12
@@ -736,14 +720,16 @@ cdef class BedProxy(NamedTupleProxy):
cdef int save_fields = self.nfields
# ensure fields to use correct format
self.nfields = self.bedfields
- retval = TupleProxy.__str__( self )
+ retval = TupleProxy.__str__(self)
self.nfields = save_fields
return retval
def __setattr__(self, key, value ):
'''set attribute.'''
- if key == "start": self.start = value
- elif key == "end": self.end = value
+ if key == "start":
+ self.start = value
+ elif key == "end":
+ self.end = value
cdef int idx
idx, f = self.map_key2field[key]
diff --git a/pysam/cutils.pxd b/pysam/cutils.pxd
new file mode 100644
index 0000000..c2a7c5f
--- /dev/null
+++ b/pysam/cutils.pxd
@@ -0,0 +1,27 @@
+#########################################################################
+# Utility functions used across pysam
+#########################################################################
+cimport cython
+from cpython cimport array as c_array
+
+cpdef parse_region(reference=*, start=*, end=*, region=*)
+
+#########################################################################
+# Utility functions for quality string conversions
+
+cpdef c_array.array qualitystring_to_array(bytes input_str, int offset=*)
+cpdef array_to_qualitystring(c_array.array arr, int offset=*)
+cpdef qualities_to_qualitystring(qualities, int offset=*)
+
+########################################################################
+########################################################################
+########################################################################
+## Python 3 compatibility functions
+########################################################################
+cdef charptr_to_str(char *s, encoding=*)
+cdef force_str(object s, encoding=*)
+cdef bytes force_bytes(object s, encoding=*)
+cdef bytes force_cmdline_bytes(object s, encoding=*)
+cdef bytes encode_filename(object filename)
+cdef from_string_and_size(char *s, size_t length)
+
diff --git a/pysam/cutils.pyx b/pysam/cutils.pyx
new file mode 100644
index 0000000..afbd97d
--- /dev/null
+++ b/pysam/cutils.pyx
@@ -0,0 +1,214 @@
+import types
+import sys
+import string
+import re
+
+from cpython.version cimport PY_MAJOR_VERSION
+from cpython cimport PyBytes_Check, PyUnicode_Check
+
+from cpython cimport array as c_array
+cimport cython
+
+#####################################################################
+# hard-coded constants
+cdef int MAX_POS = 2 << 29
+
+#################################################################
+# Utility functions for quality string conversions
+cpdef c_array.array qualitystring_to_array(bytes input_str, int offset=33):
+ """convert a qualitystring to an array of quality values."""
+ if input_str is None:
+ return None
+ cdef char i
+ return c_array.array('B', [i - offset for i in input_str])
+
+
+cpdef array_to_qualitystring(c_array.array qualities, int offset=33):
+ """convert an array of quality values to a string."""
+ if qualities is None:
+ return None
+ cdef int x
+
+ cdef c_array.array result
+ result = c_array.clone(qualities, len(qualities), zero=False)
+
+ for x from 0 <= x < len(qualities):
+ result[x] = qualities[x] + offset
+ return result.tostring()
+
+
+cpdef qualities_to_qualitystring(qualities, int offset=33):
+ """convert a list or array of quality scores to the string
+ representation used in the SAM format.
+
+ Parameters
+ ----------
+ offset : int
+ offset to be added to the quality scores to arrive at
+ the characters of the quality string (default=33).
+
+ Returns
+ -------
+ string
+ a quality string
+
+ """
+ cdef char x
+ if qualities is None:
+ return None
+ elif isinstance(qualities, c_array.array):
+ return array_to_qualitystring(qualities, offset=offset)
+ else:
+ # tuples and lists
+ return "".join([chr(x + offset) for x in qualities])
+
+
+########################################################################
+########################################################################
+########################################################################
+## Python 3 compatibility functions
+########################################################################
+IS_PYTHON3 = PY_MAJOR_VERSION >= 3
+
+cdef from_string_and_size(char* s, size_t length):
+ if PY_MAJOR_VERSION < 3:
+ return s[:length]
+ else:
+ return s[:length].decode("ascii")
+
+# filename encoding (copied from lxml.etree.pyx)
+cdef str _FILENAME_ENCODING
+_FILENAME_ENCODING = sys.getfilesystemencoding()
+if _FILENAME_ENCODING is None:
+ _FILENAME_ENCODING = sys.getdefaultencoding()
+if _FILENAME_ENCODING is None:
+ _FILENAME_ENCODING = 'ascii'
+
+#cdef char* _C_FILENAME_ENCODING
+#_C_FILENAME_ENCODING = <char*>_FILENAME_ENCODING
+
+cdef bytes encode_filename(object filename):
+ """Make sure a filename is 8-bit encoded (or None)."""
+ if filename is None:
+ return None
+ elif PyBytes_Check(filename):
+ return filename
+ elif PyUnicode_Check(filename):
+ return filename.encode(_FILENAME_ENCODING)
+ else:
+ raise TypeError(u"Argument must be string or unicode.")
+
+cdef bytes force_bytes(object s, encoding="ascii"):
+ u"""convert string or unicode object to bytes, assuming
+ ascii encoding.
+ """
+ if PY_MAJOR_VERSION < 3:
+ return s
+ elif s is None:
+ return None
+ elif PyBytes_Check(s):
+ return s
+ elif PyUnicode_Check(s):
+ return s.encode(encoding)
+ else:
+ raise TypeError(u"Argument must be string, bytes or unicode.")
+
+cdef bytes force_cmdline_bytes(object s, encoding="ascii"):
+ return force_bytes(s)
+
+cdef charptr_to_str(char* s, encoding="ascii"):
+ if s == NULL:
+ return None
+ if PY_MAJOR_VERSION < 3:
+ return s
+ else:
+ return s.decode(encoding)
+
+cdef force_str(object s, encoding="ascii"):
+ """Return s converted to str type of current Python
+ (bytes in Py2, unicode in Py3)"""
+ if s is None:
+ return None
+ if PY_MAJOR_VERSION < 3:
+ return s
+ elif PyBytes_Check(s):
+ return s.decode(encoding)
+ else:
+ # assume unicode
+ return s
+
+cpdef parse_region(reference=None,
+ start=None,
+ end=None,
+ region=None):
+ """parse alternative ways to specify a genomic region. A region can
+ either be specified by :term:`reference`, `start` and
+ `end`. `start` and `end` denote 0-based, half-open
+ intervals.
+
+ Alternatively, a samtools :term:`region` string can be
+ supplied.
+
+ If any of the coordinates are missing they will be replaced by the
+ minimum (`start`) or maximum (`end`) coordinate.
+
+ Note that region strings are 1-based, while `start` and `end` denote
+ an interval in python coordinates.
+
+ Returns
+ -------
+
+ tuple : a tuple of `reference`, `start` and `end`.
+
+ Raises
+ ------
+
+ ValueError
+ for invalid or out of bounds regions.
+
+ """
+ cdef int rtid
+ cdef long long rstart
+ cdef long long rend
+
+ rtid = -1
+ rstart = 0
+ rend = MAX_POS
+ if start != None:
+ try:
+ rstart = start
+ except OverflowError:
+ raise ValueError('start out of range (%i)' % start)
+
+ if end != None:
+ try:
+ rend = end
+ except OverflowError:
+ raise ValueError('end out of range (%i)' % end)
+
+ if region:
+ region = force_str(region)
+ parts = re.split("[:-]", region)
+ reference = parts[0]
+ if len(parts) >= 2:
+ rstart = int(parts[1]) - 1
+ if len(parts) >= 3:
+ rend = int(parts[2])
+
+ if not reference:
+ return None, 0, 0
+
+ if not 0 <= rstart < MAX_POS:
+ raise ValueError('start out of range (%i)' % rstart)
+ if not 0 <= rend <= MAX_POS:
+ raise ValueError('end out of range (%i)' % rend)
+ if rstart > rend:
+ raise ValueError(
+ 'invalid region: start (%i) > end (%i)' % (rstart, rend))
+
+ return force_bytes(reference), rstart, rend
+
+
+__all__ = ["qualitystring_to_array",
+ "array_to_qualitystring",
+ "qualities_to_qualitystring"]
diff --git a/pysam/cvcf.pxd b/pysam/cvcf.pxd
index a583d99..139597f 100644
--- a/pysam/cvcf.pxd
+++ b/pysam/cvcf.pxd
@@ -1,42 +1,2 @@
-cdef extern from "stdlib.h":
- void free(void *)
- void *malloc(size_t)
- void *calloc(size_t,size_t)
- void *realloc(void *,size_t)
- int c_abs "abs" (int)
- int c_abs "abs" (int)
- int atoi( char *nptr)
- long atol( char *nptr)
- double atof( char *nptr)
-
-cdef extern from "Python.h":
- ctypedef struct FILE
- FILE* PyFile_AsFile(object)
- char *fgets(char *str, int size, FILE *ifile)
- int feof(FILE *stream)
- size_t strlen(char *s)
- size_t getline(char **lineptr, size_t *n, FILE *stream)
- char *strstr(char *, char *)
- char *strchr(char *string, int c)
- int fileno(FILE *stream)
-
-cdef extern from "string.h":
- int strcmp(char *s1, char *s2)
- int strncmp(char *s1,char *s2,size_t len)
- char *strcpy(char *dest,char *src)
- char *strncpy(char *dest,char *src, size_t len)
- char *strdup(char *)
- char *strcat(char *,char *)
- size_t strlen(char *s)
- int memcmp( void * s1, void *s2, size_t len )
- void *memcpy(void *dest, void *src, size_t n)
- void *memchr(void *s, int c, size_t n)
-
-cdef extern from "stdint.h":
- ctypedef int int64_t
- ctypedef int int32_t
- ctypedef int uint32_t
- ctypedef int uint8_t
- ctypedef int uint64_t
diff --git a/pysam/cvcf.pyx b/pysam/cvcf.pyx
index 5feb2a6..e9fe3d0 100644
--- a/pysam/cvcf.pyx
+++ b/pysam/cvcf.pyx
@@ -1,3 +1,4 @@
+# cython: embedsignature=True
#
# Code to read, write and edit VCF files
#
@@ -39,6 +40,7 @@
#
# NOTE: the position that is returned to Python is 0-based, NOT
# 1-based as in the VCF file.
+# NOTE: There is also preliminary VCF functionality in the VariantFile class.
#
# TODO:
# only v4.0 writing is complete; alleles are not converted to v3.3 format
@@ -48,8 +50,14 @@ from collections import namedtuple, defaultdict
from operator import itemgetter
import sys, re, copy, bisect
-cimport ctabix
-cimport TabProxies
+from libc.stdlib cimport atoi
+from libc.stdint cimport int8_t, int16_t, int32_t, int64_t
+from libc.stdint cimport uint8_t, uint16_t, uint32_t, uint64_t
+
+cimport pysam.ctabix as ctabix
+cimport pysam.ctabixproxies as ctabixproxies
+
+from pysam.cutils cimport force_str
import pysam
@@ -83,43 +91,45 @@ def parse_regions( string ):
raise ValueError("Don't understand region string '%s'" % r)
result.append( (chrom,start,end) )
return result
-
+
FORMAT = namedtuple('FORMAT','id numbertype number type description missingvalue')
###########################################################################################################
-#
+#
# New class
-#
+#
###########################################################################################################
-cdef class VCFRecord( TabProxies.TupleProxy):
+cdef class VCFRecord( ctabixproxies.TupleProxy):
'''vcf record.
- initialized from data and vcf meta
+ initialized from data and vcf meta
'''
-
+
cdef vcf
cdef char * contig
cdef uint32_t pos
def __init__(self, vcf):
self.vcf = vcf
+ self.encoding = vcf.encoding
# if len(data) != len(self.vcf._samples):
# self.vcf.error(str(data),
- # self.BAD_NUMBER_OF_COLUMNS,
+ # self.BAD_NUMBER_OF_COLUMNS,
# "expected %s for %s samples (%s), got %s" % \
- # (len(self.vcf._samples),
- # len(self.vcf._samples),
- # self.vcf._samples,
+ # (len(self.vcf._samples),
+ # len(self.vcf._samples),
+ # self.vcf._samples,
# len(data)))
-
- def __cinit__(self, vcf):
+
+ def __cinit__(self, vcf):
# start indexed access at genotypes
self.offset = 9
-
+
self.vcf = vcf
-
+ self.encoding = vcf.encoding
+
def error(self, line, error, opt=None):
'''raise error.'''
# pass to vcf file for error handling
@@ -127,10 +137,10 @@ cdef class VCFRecord( TabProxies.TupleProxy):
cdef update(self, char * buffer, size_t nbytes):
'''update internal data.
-
+
nbytes does not include the terminal '\0'.
'''
- TabProxies.TupleProxy.update(self, buffer, nbytes)
+ ctabixproxies.TupleProxy.update(self, buffer, nbytes)
self.contig = self.fields[0]
# vcf counts from 1 - correct here
@@ -149,13 +159,13 @@ cdef class VCFRecord( TabProxies.TupleProxy):
def __get__(self): return self.fields[2]
property ref:
- def __get__(self):
+ def __get__(self):
return self.fields[3]
property alt:
def __get__(self):
# convert v3.3 to v4.0 alleles below
- alt = self.fields[4]
+ alt = self.fields[4]
if alt == ".": alt = []
else: alt = alt.upper().split(',')
return alt
@@ -164,7 +174,7 @@ cdef class VCFRecord( TabProxies.TupleProxy):
def __get__(self):
qual = self.fields[5]
if qual == b".": qual = -1
- else:
+ else:
try: qual = float(qual)
except: self.vcf.error(str(self),self.QUAL_NOT_NUMERICAL)
return qual
@@ -187,7 +197,7 @@ cdef class VCFRecord( TabProxies.TupleProxy):
if len(elts) == 1: v = None
elif len(elts) == 2: v = elts[1]
else: self.vcf.error(str(self),self.ERROR_INFO_STRING)
- info[elts[0]] = self.vcf.parse_formatdata(elts[0], v, self.vcf._info, str(self))
+ info[elts[0]] = self.vcf.parse_formatdata(elts[0], v, self.vcf._info, str(self.vcf))
return info
property format:
@@ -199,7 +209,7 @@ cdef class VCFRecord( TabProxies.TupleProxy):
return self.vcf._samples
def __getitem__(self, key):
-
+
# parse sample columns
values = self.fields[self.vcf._sample2column[key]].split(':')
alt = self.alt
@@ -225,9 +235,9 @@ cdef class VCFRecord( TabProxies.TupleProxy):
result[format[idx]] = result[format[idx]][:expected]
return result
-
-cdef class asVCFRecord(ctabix.Parser):
+
+cdef class asVCFRecord(ctabix.Parser):
'''converts a :term:`tabix row` into a VCF record.'''
cdef vcffile
def __init__(self, vcffile):
@@ -351,10 +361,10 @@ class VCF(object):
def parse_format(self,line,format,filter=False):
if self._version == 40:
- if not format.startswith('<'):
+ if not format.startswith('<'):
self.error(line,self.V40_MISSING_ANGLE_BRACKETS)
format = "<"+format
- if not format.endswith('>'):
+ if not format.endswith('>'):
self.error(line,self.V40_MISSING_ANGLE_BRACKETS)
format += ">"
format = format[1:-1]
@@ -372,9 +382,9 @@ class VCF(object):
elif first.startswith('Type='): data['type'] = first.split('=')[1]
elif first.startswith('Description='):
elts = format.split('"')
- if len(elts)<3:
+ if len(elts)<3:
self.error(line,self.FORMAT_MISSING_QUOTES)
- elts = first.split('=') + [rest]
+ elts = first.split('=') + [rest]
data['descr'] = elts[1]
rest = '"'.join(elts[2:])
if rest.startswith(','): rest = rest[1:]
@@ -384,7 +394,7 @@ class VCF(object):
idx += 1
if filter and idx==1: idx=3 # skip number and type fields for FILTER format strings
if not data['id']: self.error(line,self.BADLY_FORMATTED_FORMAT_STRING)
- if 'descr' not in data:
+ if 'descr' not in data:
# missing description
self.error(line,self.BADLY_FORMATTED_FORMAT_STRING)
data['descr'] = ""
@@ -420,7 +430,7 @@ class VCF(object):
data['type'] = 'Flag'
return FORMAT(data['id'],t,n,data['type'],data['descr'],data['missing'])
-
+
def format_format( self, fmt, filter=False ):
values = [('ID',fmt.id)]
if fmt.number != None and not filter:
@@ -481,7 +491,7 @@ class VCF(object):
for idx,v in enumerate(data[k]):
if v == format[k].missingvalue: data[k][idx] = "."
# make sure GT comes first; and ensure fixed ordering; also convert GT data back to string
- for k in data:
+ for k in data:
if k != 'GT': sdata.append( (k,data[k]) )
sdata.sort()
if 'GT' in data:
@@ -507,12 +517,12 @@ class VCF(object):
for f in [FORMAT('GT',self.NT_NUMBER,1,'String','Genotype','.'),
FORMAT('DP',self.NT_NUMBER,1,'Integer','Read depth at this position for this sample',-1),
FORMAT('FT',self.NT_NUMBER,1,'String','Sample Genotype Filter','.'),
- FORMAT('GL',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'),
- FORMAT('GLE',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'),
+ FORMAT('GL',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'),
+ FORMAT('GLE',self.NT_UNKNOWN,-1,'Float','Genotype likelihoods','.'),
FORMAT('GQ',self.NT_NUMBER,1,'Integer','Genotype Quality',-1),
FORMAT('PL',self.NT_GENOTYPES,-1,'Integer','Phred-scaled genotype likelihoods', '.'),
- FORMAT('GP',self.NT_GENOTYPES,-1,'Float','Genotype posterior probabilities','.'),
- FORMAT('GQ',self.NT_GENOTYPES,-1,'Integer','Conditional genotype quality','.'),
+ FORMAT('GP',self.NT_GENOTYPES,-1,'Float','Genotype posterior probabilities','.'),
+ FORMAT('GQ',self.NT_GENOTYPES,-1,'Integer','Conditional genotype quality','.'),
FORMAT('HQ',self.NT_UNKNOWN,-1,'Integer','Haplotype Quality',-1), # unknown number, since may be haploid
FORMAT('PS',self.NT_UNKNOWN,-1,'Integer','Phase set','.'),
FORMAT('PQ',self.NT_NUMBER,1,'Integer','Phasing quality',-1),
@@ -557,7 +567,7 @@ class VCF(object):
for key,value in self._header: stream.write("##%s=%s\n" % (key,value))
for var,label in [(self._info,"INFO"),(self._filter,"FILTER"),(self._format,"FORMAT")]:
for f in var.itervalues(): stream.write("##%s=%s\n" % (label,self.format_format(f,filter=(label=="FILTER"))))
-
+
def parse_heading( self, line ):
assert line.startswith('#')
@@ -572,7 +582,7 @@ class VCF(object):
if len(headings)<=i or headings[i] != s:
- if len(headings) <= i:
+ if len(headings) <= i:
err = "(%sth entry not found)" % (i+1)
else:
err = "(found %s, expected %s)" % (headings[i],s)
@@ -586,7 +596,7 @@ class VCF(object):
self._samples = headings[9:]
self._sample2column = dict( [(y,x+9) for x,y in enumerate( self._samples ) ] )
-
+
def write_heading( self, stream ):
stream.write("#" + "\t".join(self._required + self._samples) + "\n")
@@ -619,12 +629,12 @@ class VCF(object):
if f.type in ["Float","Integer"] and len(values)>0 and values[-1].find(';') > -1:
self.error(line,self.ERROR_TRAILING_DATA,values[-1])
values[-1] = values[-1].split(';')[0]
- if f.type == "Integer":
+ if f.type == "Integer":
for idx,v in enumerate(values):
try:
if v == ".": values[idx] = f.missingvalue
else: values[idx] = int(v)
- except:
+ except:
self.error(line,self.ERROR_FORMAT_NOT_INTEGER,"%s=%s" % (key, str(values)))
return [0] * len(values)
return values
@@ -633,7 +643,7 @@ class VCF(object):
if f.id == "GT": values = list(map( self.convertGT, values ))
return values
elif f.type == "Character":
- for v in values:
+ for v in values:
if len(v) != 1: self.error(line,self.ERROR_FORMAT_NOT_CHAR)
return values
elif f.type == "Float":
@@ -662,7 +672,7 @@ class VCF(object):
cols.append("")
else:
self.error(line,
- self.BAD_NUMBER_OF_COLUMNS,
+ self.BAD_NUMBER_OF_COLUMNS,
"expected %s for %s samples (%s), got %s" % (len(self._samples)+9, len(self._samples), self._samples, len(cols)))
chrom = cols[0]
@@ -677,7 +687,7 @@ class VCF(object):
# end of first-pass parse for sortedVCF
if lineparse: return chrom, pos, line
-
+
id = cols[2]
ref = cols[3].upper()
@@ -689,7 +699,7 @@ class VCF(object):
for c in ref:
if c not in "ACGTN": self.error(line,self.UNKNOWN_CHAR_IN_REF)
if "N" in ref: ref = get_sequence(chrom,pos,pos+len(ref),self._reference)
-
+
# make sure reference is sane
if self._reference:
left = max(0,pos-100)
@@ -703,7 +713,7 @@ class VCF(object):
else: alt = cols[4].upper().split(',')
if cols[5] == ".": qual = -1
- else:
+ else:
try: qual = float(cols[5])
except: self.error(line,self.QUAL_NOT_NUMERICAL)
@@ -719,9 +729,9 @@ class VCF(object):
if len(elts) == 1: v = None
elif len(elts) == 2: v = elts[1]
else: self.error(line,self.ERROR_INFO_STRING)
- info[elts[0]] = self.parse_formatdata(elts[0],
- v,
- self._info,
+ info[elts[0]] = self.parse_formatdata(elts[0],
+ v,
+ self._info,
line)
# Gracefully deal with absent FORMAT column
@@ -731,7 +741,7 @@ class VCF(object):
# check: all filters are defined
for f in filter:
if f not in self._filter: self.error(line,self.FILTER_NOT_DEFINED, f)
-
+
# check: format fields are defined
if self._format:
for f in format:
@@ -794,7 +804,7 @@ class VCF(object):
self.error(line,self.MISSING_INDEL_ALLELE_REF_BASE)
# trim trailing bases in alleles
- # AH: not certain why trimming this needs to be added
+ # AH: not certain why trimming this needs to be added
# disabled now for unit testing
# if alt:
# for i in range(1,min(len(ref),min(map(len,alt)))):
@@ -837,10 +847,10 @@ class VCF(object):
else:
if expected == -1: value = "."
else: value = ",".join(["."]*expected)
-
- dict[format[idx]] = self.parse_formatdata(format[idx],
- value,
- self._format,
+
+ dict[format[idx]] = self.parse_formatdata(format[idx],
+ value,
+ self._format,
line)
if expected != -1 and len(dict[format[idx]]) != expected:
self.error(line,self.BAD_NUMBER_OF_PARAMETERS,
@@ -861,7 +871,7 @@ class VCF(object):
'format':format}
for key,value in zip(self._samples,samples):
d[key] = value
-
+
return d
@@ -872,14 +882,14 @@ class VCF(object):
if data['alt'] == []: alt = "."
else: alt = ",".join(data['alt'])
if data['filter'] == None: filter = "."
- elif data['filter'] == []:
+ elif data['filter'] == []:
if self._version == 33: filter = "0"
else: filter = "PASS"
else: filter = ';'.join(data['filter'])
if data['qual'] == -1: qual = "."
else: qual = str(data['qual'])
- output = [data['chrom'],
+ output = [data['chrom'],
str(data['pos']+1), # change to 1-based position
data['id'],
data['ref'],
@@ -890,17 +900,17 @@ class VCF(object):
data['info'], self._info, separator=";"),
self.format_formatdata(
data['format'], self._format, value=False)]
-
+
for s in self._samples:
output.append(self.format_formatdata(
data[s], self._format, key=False))
-
+
stream.write( "\t".join(output) + "\n" )
def _parse_header(self, stream):
self._lineno = 0
for line in stream:
- line = ctabix._force_str(line, self.encoding)
+ line = force_str(line, self.encoding)
self._lineno += 1
if line.startswith('##'):
self.parse_header(line.strip())
@@ -1033,13 +1043,13 @@ class VCF(object):
self.encoding=encoding
self.tabixfile = pysam.Tabixfile(filename, encoding=encoding)
self._parse_header(self.tabixfile.header)
-
+
def fetch(self,
reference=None,
- start=None,
- end=None,
+ start=None,
+ end=None,
region=None ):
- """ Parse a stream of VCF-formatted lines.
+ """ Parse a stream of VCF-formatted lines.
Initializes class instance and return generator """
return self.tabixfile.fetch(
reference,
@@ -1053,7 +1063,7 @@ class VCF(object):
returns a validated record.
'''
-
+
raise NotImplementedError("needs to be checked")
chrom, pos = record.chrom, record.pos
@@ -1079,11 +1089,11 @@ class VCF(object):
faref = faref_leftflank[pos-left:]
if faref != ref: self.error(str(record),self.WRONG_REF,"(reference is %s, VCF says %s)" % (faref,ref))
ref = faref
-
+
# check: format fields are defined
for f in record.format:
if f not in self._format: self.error(str(record),self.FORMAT_NOT_DEFINED, f)
-
+
# check: all filters are defined
for f in record.filter:
if f not in self._filter: self.error(str(record),self.FILTER_NOT_DEFINED, f)
@@ -1136,7 +1146,7 @@ class VCF(object):
for allele in alt:
if not alleleRegEx.match(allele):
self.error(str(record),self.V40_BAD_ALLELE,allele)
-
+
# check for leading nucleotide in indel calls
for allele in alt:
@@ -1146,7 +1156,7 @@ class VCF(object):
self.error(str(record),self.MISSING_INDEL_ALLELE_REF_BASE)
# trim trailing bases in alleles
- # AH: not certain why trimming this needs to be added
+ # AH: not certain why trimming this needs to be added
# disabled now for unit testing
# for i in range(1,min(len(ref),min(map(len,alt)))):
# if len(set(allele[-1].upper() for allele in alt)) > 1 or ref[-1].upper() != alt[0][-1].upper():
@@ -1177,5 +1187,3 @@ class VCF(object):
__all__ = [
"VCF", "VCFRecord", ]
-
-
diff --git a/pysam/htslib_util.h b/pysam/htslib_util.h
index 1f9d491..46e44bc 100644
--- a/pysam/htslib_util.h
+++ b/pysam/htslib_util.h
@@ -36,9 +36,6 @@ typedef khash_t(s2i) s2i_t;
//////////////////////////////////////////////////////////////////
// various helper functions
//
-// fill pileup buffer for next position.
-
-int pysam_dispatch(int argc, char *argv[] );
/*!
@abstract Update the variable length data within a bam1_t entry
diff --git a/pysam/pysam_util.c b/pysam/pysam_util.c
index 9560ed0..f8ccae7 100644
--- a/pysam/pysam_util.c
+++ b/pysam/pysam_util.c
@@ -87,7 +87,7 @@ int pysam_dispatch(int argc, char *argv[] )
if (argc < 2) return 1;
int retval = 0;
-
+
if (strcmp(argv[1], "view") == 0) retval = main_samview(argc-1, argv+1);
else if (strcmp(argv[1], "import") == 0) retval = main_import(argc-1, argv+1);
else if (strcmp(argv[1], "mpileup") == 0) retval = bam_mpileup(argc-1, argv+1);
diff --git a/pysam/tabix_util.c b/pysam/tabix_util.c
index 89ffc23..f94b09d 100644
--- a/pysam/tabix_util.c
+++ b/pysam/tabix_util.c
@@ -1,6 +1,7 @@
// Definition of pysamerr
-#include "stdio.h"
-#include "unistd.h"
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
FILE * pysamerr = NULL;
#if !(_POSIX_C_SOURCE >= 200809L || _XOPEN_SOURCE >= 700)
diff --git a/pysam/version.py b/pysam/version.py
index 02c7d45..9047c04 100644
--- a/pysam/version.py
+++ b/pysam/version.py
@@ -1,6 +1,6 @@
# pysam versioning information
-__version__ = "0.8.3"
+__version__ = "0.8.4"
__samtools_version__ = "1.2"
diff --git a/requires.txt b/requirements.txt
similarity index 100%
rename from requires.txt
rename to requirements.txt
diff --git a/save/pysam_test2.6.py b/save/pysam_test2.6.py
index a59968c..eb4848a 100755
--- a/save/pysam_test2.6.py
+++ b/save/pysam_test2.6.py
@@ -355,7 +355,7 @@ class IOTest(unittest.TestCase):
If *use_template* is set, the header is copied from infile using the
template mechanism, otherwise target names and lengths are passed
- explicitely.
+ explicitly.
'''
diff --git a/setup.py b/setup.py
index 8c0a132..8009437 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,21 @@
-#!/usr/bin/python
-'''
+#! /usr/bin/python
+
+'''The SAM/BAM/CRAM format is a way to store efficiently large numbers
+of alignments, such as those routinely are created by next-generation
+sequencing methods.
+
+This module provides a low-level wrapper around the htslib C-API as
+using cython and a high-level API for convenient access to the data in
+SAM/BAM formatted files. Also included is an interface to the samtools
+command line utilities and the tabix C-API for reading compressed and
+indexed tabular data.
-pysam
-*****
+The current version wraps htslib-1.2.1 and samtools-1.2.
+
+See:
+http://www.htslib.org
+https://github.com/pysam-developers/pysam
+http://pysam.readthedocs.org/en/stable
'''
@@ -239,50 +252,28 @@ if len(sys.argv) >= 2 and sys.argv[1] == "refresh":
from setuptools import Extension, setup
#######################################################
-#######################################################
+parts = ["samtools", "htslib", "tabix",
+ "faidx", "samfile", "utils",
+ "alignmentfile", "tabixproxies",
+ "vcf", "bcf"]
+
try:
from Cython.Distutils import build_ext
except ImportError:
# no Cython available - use existing C code
cmdclass = {}
- csamtools_sources = ["pysam/csamtools.c"]
- chtslib_sources = ["pysam/chtslib.c"]
- tabix_sources = ["pysam/ctabix.c"]
- faidx_sources = ["pysam/cfaidx.c"]
- csamfile_sources = ["pysam/csamfile.c"]
- calignmentfile_sources = ["pysam/calignmentfile.c"]
- tabproxies_sources = ["pysam/TabProxies.c"]
- cvcf_sources = ["pysam/cvcf.c"]
- cbcf_sources = ["pysam/cbcf.c"]
+ source_pattern = "pysam/c%s.c"
else:
# remove existing files to recompute
# necessary to be both compatible for python 2.7 and 3.3
if IS_PYTHON3:
- for f in ("pysam/csamtools.c",
- "pysam/chtslib.c",
- "pysam/ctabix.c",
- "pysam/cfaidx.c",
- "pysam/csamfile.c",
- "pysam/TabProxies.c",
- "pysam/cvcf.c",
- "pysam/bvcf.c",
- ):
+ for part in parts:
try:
- os.unlink(f)
+ os.unlink("pysam/c%s.c" % part)
except:
pass
-
+ source_pattern = "pysam/c%s.pyx"
cmdclass = {'build_ext': build_ext}
- csamtools_sources = ["pysam/csamtools.pyx"]
- chtslib_sources = ["pysam/chtslib.pyx"]
- csamfile_sources = ["pysam/csamfile.pyx"]
- calignmentfile_sources = ["pysam/calignmentfile.pyx"]
- tabix_sources = ["pysam/ctabix.pyx"]
- faidx_sources = ["pysam/cfaidx.pyx"]
- tabproxies_sources = ["pysam/TabProxies.pyx"]
- cvcf_sources = ["pysam/cvcf.pyx"]
- cbcf_sources = ["pysam/cbcf.pyx"]
-
#######################################################
classifiers = """
@@ -308,153 +299,176 @@ else:
os_c_files = []
#######################################################
-samtools = Extension(
+extra_compile_args = ["-Wno-error=declaration-after-statement",
+ "-DSAMTOOLS=1"]
+define_macros = [('_FILE_OFFSET_BITS', '64'),
+ ('_USE_KNETFILE', '')]
+
+csamtools = Extension(
"pysam.csamtools",
- csamtools_sources +
- ["pysam/%s" % x for x in (
- "pysam_util.c", )] +
+ [source_pattern % "samtools",
+ "pysam/pysam_util.c"] +
glob.glob(os.path.join("samtools", "*.pysam.c")) +
- os_c_files +
glob.glob(os.path.join("samtools", "*", "*.pysam.c")) +
+ os_c_files +
htslib_sources,
- library_dirs=[],
+ library_dirs=htslib_library_dirs,
include_dirs=["samtools", "pysam"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')]
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
-htslib = Extension(
+chtslib = Extension(
"pysam.libchtslib",
- chtslib_sources +
- ["pysam/%s" % x for x in (
- "htslib_util.c", )] +
+ [source_pattern % "htslib",
+ "pysam/htslib_util.c"] +
shared_htslib_sources +
os_c_files,
library_dirs=htslib_library_dirs,
include_dirs=["pysam"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')]
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
# samfile requires functions defined in bam_md.c
# for __advance_samtools method.
# Selected ones have been copied into samfile_utils.c
# Needs to be devolved somehow.
-samfile = Extension(
+csamfile = Extension(
"pysam.csamfile",
- csamfile_sources +
- ["pysam/%s" % x for x in (
- "htslib_util.c", "samfile_util.c",)] +
- ["samtools/kprobaln.c"] +
+ [source_pattern % "samfile",
+ "pysam/htslib_util.c",
+ "pysam/samfile_util.c",
+ "samtools/kprobaln.c"] +
htslib_sources +
os_c_files,
library_dirs=htslib_library_dirs,
include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=[
- "-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')]
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
# alignmentfile requires functions defined in bam_md.c
# for __advance_samtools method.
# Selected ones have been copied into samfile_utils.c
# Needs to be devolved somehow.
-alignmentfile = Extension(
+calignmentfile = Extension(
"pysam.calignmentfile",
- calignmentfile_sources +
- ["pysam/%s" % x for x in (
- "htslib_util.c", "samfile_util.c",)] +
- ["samtools/kprobaln.c"] +
+ [source_pattern % "alignmentfile",
+ "pysam/htslib_util.c",
+ "pysam/samfile_util.c",
+ "samtools/kprobaln.c"] +
htslib_sources +
os_c_files,
library_dirs=htslib_library_dirs,
include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=[
- "-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')]
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
+)
+
+# alignmentfile requires functions defined in bam_md.c
+# for __advance_samtools method.
+# Selected ones have been copied into samfile_utils.c
+# Needs to be devolved somehow.
+calignedsegment = Extension(
+ "pysam.calignedsegment",
+ [source_pattern % "alignedsegment",
+ "pysam/htslib_util.c",
+ "pysam/samfile_util.c",
+ "samtools/kprobaln.c"] +
+ htslib_sources +
+ os_c_files,
+ library_dirs=htslib_library_dirs,
+ include_dirs=["pysam", "samtools"] + include_os + htslib_include_dirs,
+ libraries=["z"] + htslib_libraries,
+ language="c",
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
-tabix = Extension(
+ctabix = Extension(
"pysam.ctabix",
- tabix_sources +
- ["pysam/%s" % x for x in ("tabix_util.c", )] +
+ [source_pattern % "tabix",
+ "pysam/tabix_util.c"] +
htslib_sources +
os_c_files,
library_dirs=["pysam"] + htslib_library_dirs,
include_dirs=["pysam"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')],
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
-faidx = Extension(
+cutils = Extension(
+ "pysam.cutils",
+ [source_pattern % "utils"] +
+ htslib_sources +
+ os_c_files,
+ library_dirs=["pysam"] + htslib_library_dirs,
+ include_dirs=["pysam"] + include_os + htslib_include_dirs,
+ libraries=["z"] + htslib_libraries,
+ language="c",
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
+)
+
+cfaidx = Extension(
"pysam.cfaidx",
- faidx_sources +
+ [source_pattern % "faidx"] +
htslib_sources +
os_c_files,
- library_dirs=["pysam"],
+ library_dirs=["pysam"] + htslib_library_dirs,
include_dirs=["pysam"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')],
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
-tabproxies = Extension(
- "pysam.TabProxies",
- tabproxies_sources + os_c_files,
+ctabixproxies = Extension(
+ "pysam.ctabixproxies",
+ [source_pattern % "tabixproxies"] +
+ os_c_files,
library_dirs=[],
include_dirs=include_os,
libraries=["z"],
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement"],
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
cvcf = Extension(
"pysam.cvcf",
- cvcf_sources + os_c_files,
+ [source_pattern % "vcf"] +
+ os_c_files,
library_dirs=[],
include_dirs=["htslib"] + include_os + htslib_include_dirs,
libraries=["z"],
language="c",
- extra_compile_args=["-Wno-error=declaration-after-statement"],
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
cbcf = Extension(
"pysam.cbcf",
- cbcf_sources +
+ [source_pattern % "bcf"] +
htslib_sources +
os_c_files,
library_dirs=htslib_library_dirs,
include_dirs=["htslib"] + include_os + htslib_include_dirs,
libraries=["z"] + htslib_libraries,
language="c",
- extra_compile_args=[
- "-Wno-error=declaration-after-statement",
- "-DSAMTOOLS=1"],
- define_macros=[('_FILE_OFFSET_BITS', '64'),
- ('_USE_KNETFILE', '')]
+ extra_compile_args=extra_compile_args,
+ define_macros=define_macros
)
metadata = {
@@ -475,15 +489,17 @@ metadata = {
# 'pysam.include.samtools.bcftools',
'pysam.include.samtools.win32'],
'requires': ['cython (>=0.21)'],
- 'ext_modules': [samtools,
- htslib,
- samfile,
- alignmentfile,
- tabix,
- tabproxies,
+ 'ext_modules': [csamtools,
+ chtslib,
+ csamfile,
+ calignmentfile,
+ calignedsegment,
+ ctabix,
+ ctabixproxies,
cvcf,
cbcf,
- faidx],
+ cfaidx,
+ cutils],
'cmdclass': cmdclass,
'package_dir': {'pysam': 'pysam',
'pysam.include.htslib': 'htslib',
diff --git a/tests/AlignedSegment_test.py b/tests/AlignedSegment_test.py
index 2f096c2..a42a6cb 100644
--- a/tests/AlignedSegment_test.py
+++ b/tests/AlignedSegment_test.py
@@ -2,6 +2,7 @@ import os
import pysam
import unittest
from TestUtils import checkFieldEqual
+import copy
SAMTOOLS = "samtools"
WORKDIR = "pysam_test_work"
@@ -24,7 +25,7 @@ class ReadTest(unittest.TestCase):
a.next_reference_id = 0
a.next_reference_start = 200
a.template_length = 167
- a.query_qualities = pysam.fromQualityString("1234") * 10
+ a.query_qualities = pysam.qualitystring_to_array("1234") * 10
# todo: create tags
return a
@@ -39,7 +40,7 @@ class TestAlignedSegment(ReadTest):
a = pysam.AlignedSegment()
self.assertEqual(a.query_name, None)
self.assertEqual(a.query_sequence, None)
- self.assertEqual(pysam.toQualityString(a.query_qualities), None)
+ self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)
self.assertEqual(a.flag, 0)
self.assertEqual(a.reference_id, 0)
self.assertEqual(a.mapping_quality, 0)
@@ -148,14 +149,14 @@ class TestAlignedSegment(ReadTest):
'''
a = self.buildRead()
a.query_sequence = a.query_sequence[5:10]
- self.assertEqual(pysam.toQualityString(a.query_qualities), None)
+ self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), None)
a = self.buildRead()
- s = pysam.toQualityString(a.query_qualities)
+ s = pysam.qualities_to_qualitystring(a.query_qualities)
a.query_sequence = a.query_sequence[5:10]
- a.query_qualities = pysam.fromQualityString(s[5:10])
+ a.query_qualities = pysam.qualitystring_to_array(s[5:10])
- self.assertEqual(pysam.toQualityString(a.query_qualities), s[5:10])
+ self.assertEqual(pysam.qualities_to_qualitystring(a.query_qualities), s[5:10])
def testLargeRead(self):
'''build an example read.'''
@@ -171,7 +172,7 @@ class TestAlignedSegment(ReadTest):
a.next_reference_id = 0
a.next_reference_start = 200
a.template_length = 167
- a.query_qualities = pysam.fromQualityString("1234") * 200
+ a.query_qualities = pysam.qualitystring_to_array("1234") * 200
return a
@@ -227,15 +228,8 @@ class TestAlignedSegment(ReadTest):
[(20, 30), (31, 40), (40, 60)])
def test_get_aligned_pairs_soft_clipping(self):
- a = pysam.AlignedSegment()
- a.query_name = "read_12345"
- a.query_sequence = "ACGT" * 10
- a.flag = 0
- a.reference_id = 0
- a.reference_start = 20
- a.mapping_quality = 20
+ a = self.buildRead()
a.cigartuples = ((4, 2), (0, 35), (4, 3))
- a.query_qualities = pysam.fromQualityString("1234") * 10
self.assertEqual(a.get_aligned_pairs(),
[(0, None), (1, None)] +
[(qpos, refpos) for (qpos, refpos) in zip(
@@ -250,15 +244,8 @@ class TestAlignedSegment(ReadTest):
)
def test_get_aligned_pairs_hard_clipping(self):
- a = pysam.AlignedSegment()
- a.query_name = "read_12345"
- a.query_sequence = "ACGT" * 10
- a.flag = 0
- a.reference_id = 0
- a.reference_start = 20
- a.mapping_quality = 20
+ a = self.buildRead()
a.cigartuples = ((5, 2), (0, 35), (5, 3))
- a.query_qualities = pysam.fromQualityString("1234") * 10
self.assertEqual(a.get_aligned_pairs(),
# No seq, no seq pos
[(qpos, refpos) for (qpos, refpos) in zip(
@@ -268,15 +255,8 @@ class TestAlignedSegment(ReadTest):
range(0, 0 + 35), range(20, 20 + 35))])
def test_get_aligned_pairs_skip(self):
- a = pysam.AlignedSegment()
- a.query_name = "read_12345"
- a.query_sequence = "ACGT" * 10
- a.flag = 0
- a.reference_id = 0
- a.reference_start = 20
- a.mapping_quality = 20
- a.cigartuples = ((0, 2), (3, 100), (0, 38))
- a.query_qualities = pysam.fromQualityString("1234") * 10
+ a = self.buildRead()
+ a.cigarstring = "2M100D38M"
self.assertEqual(a.get_aligned_pairs(),
[(0, 20), (1, 21)] +
[(None, refpos) for refpos in range(22, 22 + 100)] +
@@ -291,15 +271,8 @@ class TestAlignedSegment(ReadTest):
range(20 + 2 + 100, 20 + 2 + 100 + 38))])
def test_get_aligned_pairs_match_mismatch(self):
- a = pysam.AlignedSegment()
- a.query_name = "read_12345"
- a.query_sequence = "ACGT" * 10
- a.flag = 0
- a.reference_id = 0
- a.reference_start = 20
- a.mapping_quality = 20
+ a = self.buildRead()
a.cigartuples = ((7, 20), (8, 20))
- a.query_qualities = pysam.fromQualityString("1234") * 10
self.assertEqual(a.get_aligned_pairs(),
[(qpos, refpos) for (qpos, refpos) in zip(
range(0, 0 + 40), range(20, 20 + 40))])
@@ -308,21 +281,68 @@ class TestAlignedSegment(ReadTest):
range(0, 0 + 40), range(20, 20 + 40))])
def test_get_aligned_pairs_padding(self):
- a = pysam.AlignedSegment()
- a.query_name = "read_12345"
- a.query_sequence = "ACGT" * 10
- a.flag = 0
- a.reference_id = 0
- a.reference_start = 20
- a.mapping_quality = 20
+ a = self.buildRead()
a.cigartuples = ((7, 20), (6, 1), (8, 19))
- a.query_qualities = pysam.fromQualityString("1234") * 10
def inner():
a.get_aligned_pairs()
# padding is not bein handled right now
self.assertRaises(NotImplementedError, inner)
+ def test_get_aligned_pairs(self):
+ a = self.buildRead()
+ a.query_sequence = "A" * 9
+ a.cigarstring = "9M"
+ a.set_tag("MD", "9")
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=True),
+ [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+ (3, 23, 'A'), (4, 24, 'A'), (5, 25, 'A'),
+ (6, 26, 'A'), (7, 27, 'A'), (8, 28, 'A')])
+
+ a.set_tag("MD", "4C4")
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=True),
+ [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+ (3, 23, 'A'), (4, 24, 'c'), (5, 25, 'A'),
+ (6, 26, 'A'), (7, 27, 'A'), (8, 28, 'A')])
+
+ a.cigarstring = "5M2D4M"
+ a.set_tag("MD", "4C^TT4")
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=True),
+ [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+ (3, 23, 'A'), (4, 24, 'c'),
+ (None, 25, 'T'), (None, 26, 'T'),
+ (5, 27, 'A'), (6, 28, 'A'), (7, 29, 'A'), (8, 30, 'A')]
+ )
+
+ a.cigarstring = "5M2D2I2M"
+ a.set_tag("MD", "4C^TT2")
+ self.assertEqual(
+ a.get_aligned_pairs(with_seq=True),
+ [(0, 20, 'A'), (1, 21, 'A'), (2, 22, 'A'),
+ (3, 23, 'A'), (4, 24, 'c'),
+ (None, 25, 'T'), (None, 26, 'T'),
+ (5, None, None), (6, None, None),
+ (7, 27, 'A'), (8, 28, 'A')]
+ )
+
+ def testNoSequence(self):
+ '''issue 176: retrieving length without query sequence
+ with soft-clipping.
+ '''
+ a = self.buildRead()
+ a.query_sequence = None
+ a.cigarstring = "20M"
+ self.assertEqual(a.query_alignment_length, 20)
+ a.cigarstring = "20M1S"
+ self.assertEqual(a.query_alignment_length, 20)
+ a.cigarstring = "1S20M"
+ self.assertEqual(a.query_alignment_length, 20)
+ a.cigarstring = "1S20M1S"
+ self.assertEqual(a.query_alignment_length, 20)
+
class TestTags(ReadTest):
@@ -443,5 +463,125 @@ class TestTags(ReadTest):
after = entry.get_tags()
self.assertEqual(after, before)
+ def testMDTag(self):
+ a = self.buildRead()
+
+ # Substitutions only
+ a.cigarstring = "21M"
+ a.query_sequence = "A" * 21
+ a.set_tag('MD', "5C0T0G05C0G0T5")
+ self.assertEqual(
+ "AAAAActgAAAAAcgtAAAAA",
+ a.get_reference_sequence())
+
+ a.cigarstring = "21M"
+ a.query_sequence = "A" * 21
+ a.set_tag('MD', "5CTG5CGT5")
+ self.assertEqual(
+ "AAAAActgAAAAAcgtAAAAA",
+ a.get_reference_sequence())
+
+ a.cigarstring = "11M"
+ a.query_sequence = "A" * 11
+ a.set_tag('MD', "CTG5CGT")
+ self.assertEqual(
+ "ctgAAAAAcgt",
+ a.get_reference_sequence())
+
+ # insertions are silent
+ a.cigarstring = "5M1I5M"
+ a.query_sequence = "A" * 5 + "C" + "A" * 5
+ a.set_tag('MD', "11")
+ self.assertEqual(
+ a.query_sequence,
+ a.get_reference_sequence())
+
+ a.cigarstring = "1I10M"
+ self.assertEqual(
+ a.query_sequence,
+ a.get_reference_sequence())
+
+ a.cigarstring = "10M1I"
+ self.assertEqual(
+ a.query_sequence,
+ a.get_reference_sequence())
+
+ a.cigarstring = "5M1D5M"
+ a.query_sequence = "A" * 10
+ a.set_tag('MD', "5^C5")
+ self.assertEqual(
+ "A" * 5 + "C" + "A" * 5,
+ a.get_reference_sequence())
+
+ a.cigarstring = "5M1D5M"
+ a.query_sequence = "A" * 10
+ a.set_tag('MD', "5^CCC5")
+ self.assertEqual(
+ "A" * 5 + "C" * 3 + "A" * 5,
+ a.get_reference_sequence())
+
+ # softclipping
+ a.cigarstring = "5S5M1D5M5S"
+ a.query_sequence = "G" * 5 + "A" * 10 + "G" * 5
+ a.set_tag('MD', "10")
+ self.assertEqual(
+ "A" * 10,
+ a.get_reference_sequence())
+
+ # all together
+ a.cigarstring = "5S5M1D5M1I5M5S"
+ a.query_sequence = "G" * 5 + "A" * 16 + "G" * 5
+ a.set_tag('MD', "2C2^T10")
+ self.assertEqual(
+ "AAcAATAAAAAAAAAA",
+ a.get_reference_sequence())
+
+ # all together
+ a.cigarstring = "5S5M1D2I5M5S"
+ a.query_sequence = "G" * 5 + "A" * 11 + "G" * 5
+ a.set_tag('MD', "2C2^TC5")
+ self.assertEqual(
+ "AAcAATCAAAAA",
+ a.get_reference_sequence())
+
+
+class TestCopy(ReadTest):
+
+ def testCopy(self):
+ a = self.buildRead()
+ b = copy.copy(a)
+ # check if a and be are the same
+ self.assertEqual(a, b)
+
+ # check if they map to different objects
+ a.query_name = 'ReadA'
+ b.query_name = 'ReadB'
+ self.assertEqual(a.query_name, 'ReadA')
+ self.assertEqual(b.query_name, 'ReadB')
+
+ def testDeepCopy(self):
+ a = self.buildRead()
+ b = copy.deepcopy(a)
+ # check if a and be are the same
+ self.assertEqual(a, b)
+
+ # check if they map to different objects
+ a.query_name = 'ReadA'
+ b.query_name = 'ReadB'
+ self.assertEqual(a.query_name, 'ReadA')
+ self.assertEqual(b.query_name, 'ReadB')
+
+
+class TestAsString(unittest.TestCase):
+
+ def testAsString(self):
+ with open(os.path.join(DATADIR, "ex2.sam")) as samf:
+ reference = [x for x in samf if not x.startswith("@")]
+
+ with pysam.AlignmentFile(
+ os.path.join(DATADIR, "ex2.bam"), "r") as pysamf:
+ for s, p in zip(reference, pysamf):
+ self.assertEqual(s, p.tostring(pysamf))
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/AlignmentFile_test.py b/tests/AlignmentFile_test.py
index 751fad0..30fed5b 100644
--- a/tests/AlignmentFile_test.py
+++ b/tests/AlignmentFile_test.py
@@ -166,12 +166,12 @@ class BasicTestBAMFromFetch(unittest.TestCase):
self.reads[3].query_sequence, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG"))
def testARqual(self):
- self.assertEqual(pysam.toQualityString(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
- "quality string mismatch in read 1: %s != %s" % (pysam.toQualityString(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
- self.assertEqual(pysam.toQualityString(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (
- pysam.toQualityString(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"))
- self.assertEqual(pysam.toQualityString(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
- "quality string mismatch in read 3: %s != %s" % (pysam.toQualityString(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
+ self.assertEqual(pysam.qualities_to_qualitystring(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
+ "quality string mismatch in read 1: %s != %s" % (pysam.qualities_to_qualitystring(self.reads[0].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
+ self.assertEqual(pysam.qualities_to_qualitystring(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (
+ pysam.qualities_to_qualitystring(self.reads[1].query_qualities), "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"))
+ self.assertEqual(pysam.qualities_to_qualitystring(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
+ "quality string mismatch in read 3: %s != %s" % (pysam.qualities_to_qualitystring(self.reads[3].query_qualities), "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
def testARquery(self):
self.assertEqual(
@@ -195,22 +195,22 @@ class BasicTestBAMFromFetch(unittest.TestCase):
def testARqqual(self):
self.assertEqual(
- pysam.toQualityString(self.reads[0].query_alignment_qualities),
+ pysam.qualities_to_qualitystring(self.reads[0].query_alignment_qualities),
"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<",
"qquality string mismatch in read 1: %s != %s" %
- (pysam.toQualityString(self.reads[0].query_alignment_qualities),
+ (pysam.qualities_to_qualitystring(self.reads[0].query_alignment_qualities),
"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<"))
self.assertEqual(
- pysam.toQualityString(self.reads[1].query_alignment_qualities),
+ pysam.qualities_to_qualitystring(self.reads[1].query_alignment_qualities),
"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<",
"qquality string mismatch in read 2: %s != %s" %
- (pysam.toQualityString(self.reads[1].query_alignment_qualities),
+ (pysam.qualities_to_qualitystring(self.reads[1].query_alignment_qualities),
"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<"))
self.assertEqual(
- pysam.toQualityString(self.reads[3].query_alignment_qualities),
+ pysam.qualities_to_qualitystring(self.reads[3].query_alignment_qualities),
"<<<<<<<<<<<<<<<<<:<9/,&,22",
"qquality string mismatch in read 3: %s != %s" %
- (pysam.toQualityString(self.reads[3].query_alignment_qualities),
+ (pysam.qualities_to_qualitystring(self.reads[3].query_alignment_qualities),
"<<<<<<<<<<<<<<<<<:<9/,&,22"))
def testPresentOptionalFields(self):
@@ -312,7 +312,7 @@ class BasicTestCRAMFromFetch(BasicTestBAMFromFetch):
(self.reads[1].opt('MF'), 18))
-class BasicTestSAMFromFile(BasicTestBAMFromFetch):
+class BasicTestSAMFromFilename(BasicTestBAMFromFetch):
def setUp(self):
self.samfile = pysam.AlignmentFile(
@@ -321,7 +321,7 @@ class BasicTestSAMFromFile(BasicTestBAMFromFetch):
self.reads = [r for r in self.samfile]
-class BasicTestCRAMFromFile(BasicTestCRAMFromFetch):
+class BasicTestCRAMFromFilename(BasicTestCRAMFromFetch):
def setUp(self):
self.samfile = pysam.AlignmentFile(
@@ -330,7 +330,7 @@ class BasicTestCRAMFromFile(BasicTestCRAMFromFetch):
self.reads = [r for r in self.samfile]
-class BasicTestBAMFromFile(BasicTestBAMFromFetch):
+class BasicTestBAMFromFilename(BasicTestBAMFromFetch):
def setUp(self):
self.samfile = pysam.AlignmentFile(
@@ -339,6 +339,33 @@ class BasicTestBAMFromFile(BasicTestBAMFromFetch):
self.reads = [r for r in self.samfile]
+class BasicTestBAMFromFile(BasicTestBAMFromFetch):
+
+ def setUp(self):
+ f = open(os.path.join(DATADIR, "ex3.bam"))
+ self.samfile = pysam.AlignmentFile(
+ f, "rb")
+ self.reads = [r for r in self.samfile]
+
+
+class BasicTestSAMFromFile(BasicTestBAMFromFetch):
+
+ def setUp(self):
+ f = open(os.path.join(DATADIR, "ex3.sam"))
+ self.samfile = pysam.AlignmentFile(
+ f, "r")
+ self.reads = [r for r in self.samfile]
+
+
+class BasicTestCRAMFromFile(BasicTestCRAMFromFetch):
+
+ def setUp(self):
+ f = open(os.path.join(DATADIR, "ex3.cram"))
+ self.samfile = pysam.AlignmentFile(
+ f, "rc")
+ self.reads = [r for r in self.samfile]
+
+
##################################################
#
# Test of basic File I/O
@@ -370,7 +397,7 @@ class TestIO(unittest.TestCase):
If *use_template* is set, the header is copied from infile
using the template mechanism, otherwise target names and
- lengths are passed explicitely.
+ lengths are passed explicitly.
The *checkf* is used to determine if the files are
equal.
@@ -619,6 +646,25 @@ class TestIO(unittest.TestCase):
samfile.close()
self.assertRaises(ValueError, samfile.fetch, 'chr1', 100, 120)
+ def testFetchFromClosedFileObject(self):
+
+ f = open(os.path.join(DATADIR, "ex1.bam"))
+ samfile = pysam.AlignmentFile(f, "rb")
+ f.close()
+ self.assertTrue(f.closed)
+ # access to Samfile should still work
+ self.checkEcho("ex1.bam",
+ "ex1.bam",
+ "tmp_ex1.bam",
+ "rb", "wb")
+
+ f = open(os.path.join(DATADIR, "ex1.bam"))
+ samfile = pysam.AlignmentFile(f, "rb")
+ self.assertFalse(f.closed)
+ samfile.close()
+ # python file needs to be closed separately
+ self.assertFalse(f.closed)
+
def testClosedFile(self):
'''test that access to a closed samfile raises ValueError.'''
@@ -763,10 +809,10 @@ class TestIteratorRowBAM(unittest.TestCase):
str(a), str(d)))
qual = d[10]
self.assertEqual(
- pysam.toQualityString(a.query_qualities),
+ pysam.qualities_to_qualitystring(a.query_qualities),
qual,
"line %i: quality mismatch: %s != %s, \n%s\n%s\n" %
- (line, pysam.toQualityString(a.query_qualities), qual,
+ (line, pysam.qualities_to_qualitystring(a.query_qualities), qual,
str(a), str(d)))
def testIteratePerContig(self):
@@ -1013,7 +1059,7 @@ class TestTagParsing(unittest.TestCase):
a.next_reference_id = 0
a.next_reference_start = 200
a.template_length = 0
- a.query_qualities = pysam.fromQualityString("1234") * 3
+ a.query_qualities = pysam.qualitystring_to_array("1234") * 3
# todo: create tags
return a
@@ -1074,7 +1120,7 @@ class TestTagParsing(unittest.TestCase):
def c(r, l):
r.tags = [('ZM', l)]
- self.assertEqual(r.opt("ZM"), list(l))
+ self.assertEqual(list(r.opt("ZM")), list(l))
# signed integers
c(r, (-1, 1))
@@ -1113,10 +1159,10 @@ class TestClipping(unittest.TestCase):
if read.query_name == "r001":
self.assertEqual(read.query_sequence, 'AAAAGATAAGGATA')
self.assertEqual(read.query_alignment_sequence, 'AGATAAGGATA')
- self.assertEqual(pysam.toQualityString(read.query_qualities),
+ self.assertEqual(pysam.qualities_to_qualitystring(read.query_qualities),
None)
self.assertEqual(
- pysam.toQualityString(read.query_alignment_qualities),
+ pysam.qualities_to_qualitystring(read.query_alignment_qualities),
None)
elif read.query_name == "r002":
@@ -1124,10 +1170,10 @@ class TestClipping(unittest.TestCase):
self.assertEqual(read.query_sequence, 'GCCTAAGCTAA')
self.assertEqual(read.query_alignment_sequence, 'AGCTAA')
self.assertEqual(
- pysam.toQualityString(read.query_qualities),
+ pysam.qualities_to_qualitystring(read.query_qualities),
'01234567890')
self.assertEqual(
- pysam.toQualityString(read.query_alignment_qualities),
+ pysam.qualities_to_qualitystring(read.query_alignment_qualities),
'567890')
elif read.query_name == "r003":
@@ -1135,10 +1181,10 @@ class TestClipping(unittest.TestCase):
self.assertEqual(read.query_sequence, 'GCCTAAGCTAA')
self.assertEqual(read.query_alignment_sequence, 'GCCTAA')
self.assertEqual(
- pysam.toQualityString(read.query_qualities),
+ pysam.qualities_to_qualitystring(read.query_qualities),
'01234567890')
self.assertEqual(
- pysam.toQualityString(read.query_alignment_qualities),
+ pysam.qualities_to_qualitystring(read.query_alignment_qualities),
'012345')
elif read.query_name == "r004":
@@ -1146,10 +1192,10 @@ class TestClipping(unittest.TestCase):
self.assertEqual(read.query_sequence, 'TAGGC')
self.assertEqual(read.query_alignment_sequence, 'TAGGC')
self.assertEqual(
- pysam.toQualityString(read.query_qualities),
+ pysam.qualities_to_qualitystring(read.query_qualities),
'01234')
self.assertEqual(
- pysam.toQualityString(read.query_alignment_qualities),
+ pysam.qualities_to_qualitystring(read.query_alignment_qualities),
'01234')
@@ -1347,7 +1393,7 @@ class TestContextManager(unittest.TestCase):
with pysam.AlignmentFile(os.path.join(DATADIR, 'ex1.bam'),
'rb') as samfile:
samfile.fetch()
- self.assertEqual(samfile._isOpen(), False)
+ self.assertEqual(samfile.closed, True)
class TestExceptions(unittest.TestCase):
@@ -1482,7 +1528,7 @@ class TestDeNovoConstruction(unittest.TestCase):
a.next_reference_id = 0
a.next_reference_start = 199
a.template_length = 167
- a.query_qualities = pysam.fromQualityString(
+ a.query_qualities = pysam.qualitystring_to_array(
"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<")
a.tags = (("NM", 1),
("RG", "L1"))
@@ -1498,7 +1544,7 @@ class TestDeNovoConstruction(unittest.TestCase):
b.next_reference_id = 1
b.next_reference_start = 499
b.template_length = 412
- b.query_qualities = pysam.fromQualityString(
+ b.query_qualities = pysam.qualitystring_to_array(
"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<")
b.tags = (("MF", 18),
("RG", "L2"))
@@ -1625,12 +1671,35 @@ class TestTruncatedBAM(unittest.TestCase):
iterall = lambda x: len([a for a in x])
self.assertRaises(IOError, iterall, s.fetch())
+COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204,
+ 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78,
+ 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1,
+ 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195,
+ 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0,
+ 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2,
+ 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194,
+ 0, 180, 0, 108, 0, 203, 104, 16, 5, 205,
+ 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0,
+ 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10,
+ 109, 102, 9, 0, 292, 0, 110, 0, 0, 102,
+ 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2,
+ 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0,
+ 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93,
+ 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0,
+ 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109,
+ 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112,
+ 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129,
+ 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100,
+ 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0,
+ 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0,
+ 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0,
+ 223, 111, 103, 0, 5, 225, 0, 95]
class TestBTagSam(unittest.TestCase):
'''see issue 81.'''
- compare = [[100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 10 [...]
+ compare = [COMPARE_BTAG,
[-100, 200, -300, -400],
[-100, 12],
[12, 15],
@@ -1638,26 +1707,47 @@ class TestBTagSam(unittest.TestCase):
filename = os.path.join(DATADIR, 'example_btag.sam')
- def testRead(self):
+ read0 = [('RG', 'QW85I'),
+ ('PG', 'tmap'),
+ ('MD', '140'),
+ ('NM', 0),
+ ('AS', 140),
+ ('FZ', array.array('H', COMPARE_BTAG)),
+ ('XA', 'map2-1'),
+ ('XS', 53),
+ ('XT', 38),
+ ('XF', 1),
+ ('XE', 0)]
+
+ def testReadTags(self):
s = pysam.AlignmentFile(self.filename)
for x, read in enumerate(s):
+ tags = read.tags
if x == 0:
- self.assertEqual(read.tags, [('RG', 'QW85I'), ('PG', 'tmap'), ('MD', '140'), ('NM', 0), ('AS', 140), ('FZ', [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 20 [...]
- )
-
- fz = dict(read.tags)["FZ"]
+ self.assertEqual(tags, self.read0)
+
+ fz = list(dict(tags)["FZ"])
self.assertEqual(fz, self.compare[x])
- self.assertEqual(read.opt("FZ"), self.compare[x])
-
- def testWrite(self):
+ self.assertEqual(list(read.opt("FZ")), self.compare[x])
+ self.assertEqual(tags, read.get_tags())
+ for tag, value in tags:
+ self.assertEqual(value, read.get_tag(tag))
+
+ def testReadWriteTags(self):
s = pysam.AlignmentFile(self.filename)
for read in s:
before = read.tags
- read.tags = read.tags
- after = read.tags
- self.assertEqual(after, before)
+ read.tags = before
+ self.assertEqual(read.tags, before)
+
+ read.set_tags(before)
+ self.assertEqual(read.tags, before)
+
+ for tag, value in before:
+ read.set_tag(tag, value)
+ self.assertEqual(value, read.get_tag(tag))
class TestBTagBam(TestBTagSam):
@@ -1861,18 +1951,50 @@ class TestPileup(unittest.TestCase):
fastafile=self.fastafile)
self.checkEqual(refs, iterator)
- def count_coverage_python(self, bam, chr, start, stop, read_callback, quality_threshold=15):
+
+class TestCountCoverage(unittest.TestCase):
+
+ samfilename = "pysam_data/ex1.bam"
+ fastafilename = "pysam_data/ex1.fa"
+
+ def setUp(self):
+
+ self.samfile = pysam.AlignmentFile(self.samfilename)
+ self.fastafile = pysam.Fastafile(self.fastafilename)
+
+ samfile = pysam.AlignmentFile(
+ "test_count_coverage_read_all.bam", 'wb',
+ template=self.samfile)
+ for ii, read in enumerate(self.samfile.fetch()):
+ # if ii % 2 == 0: # setting BFUNMAP makes no sense...
+ #read.flag = read.flag | 0x4
+ if ii % 3 == 0:
+ read.flag = read.flag | 0x100
+ if ii % 5 == 0:
+ read.flag = read.flag | 0x200
+ if ii % 7 == 0:
+ read.flag = read.flag | 0x400
+ samfile.write(read)
+ samfile.close()
+ pysam.index("test_count_coverage_read_all.bam")
+
+ def count_coverage_python(self, bam, chrom, start, stop,
+ read_callback,
+ quality_threshold=15):
l = stop - start
count_a = array.array('L', [0] * l)
count_c = array.array('L', [0] * l)
count_g = array.array('L', [0] * l)
count_t = array.array('L', [0] * l)
- for p in bam.pileup(chr, start, stop, truncate=True, stepper='nofilter'):
+ for p in bam.pileup(chrom, start, stop, truncate=True,
+ stepper='nofilter'):
rpos = p.reference_pos - start
for read in p.pileups:
- if not read.is_del and not read.is_refskip and read_callback(read.alignment):
+ if not read.is_del and not read.is_refskip and \
+ read_callback(read.alignment):
try:
- if read.alignment.query_qualities[read.query_position] > quality_threshold:
+ if read.alignment.query_qualities[read.query_position] \
+ >= quality_threshold:
letter = read.alignment.query[read.query_position]
if letter == 'A':
count_a[rpos] += 1
@@ -1887,51 +2009,59 @@ class TestPileup(unittest.TestCase):
return count_a, count_c, count_g, count_t
def test_count_coverage(self):
- chr = 'chr1'
+ chrom = 'chr1'
start = 0
stop = 2000
- manual_counts = self.count_coverage_python(self.samfile, chr, start, stop,
- lambda read: True,
- quality_threshold=0)
- fast_counts = self.samfile.count_coverage(chr, start, stop,
- read_callback=lambda read: True,
- quality_threshold=0)
- self.assertEqual(fast_counts[0], manual_counts[0])
- self.assertEqual(fast_counts[1], manual_counts[1])
- self.assertEqual(fast_counts[2], manual_counts[2])
- self.assertEqual(fast_counts[3], manual_counts[3])
+ manual_counts = self.count_coverage_python(
+ self.samfile, chrom, start, stop,
+ lambda read: True,
+ quality_threshold=0)
+ fast_counts = self.samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: True,
+ quality_threshold=0)
+
+ self.assertEqual(list(fast_counts[0]), list(manual_counts[0]))
+ self.assertEqual(list(fast_counts[1]), list(manual_counts[1]))
+ self.assertEqual(list(fast_counts[2]), list(manual_counts[2]))
+ self.assertEqual(list(fast_counts[3]), list(manual_counts[3]))
def test_count_coverage_quality_filter(self):
- chr = 'chr1'
+ chrom = 'chr1'
start = 0
stop = 2000
- manual_counts = self.count_coverage_python(self.samfile, chr, start, stop,
- lambda read: True,
- quality_threshold=0)
- fast_counts = self.samfile.count_coverage(chr, start, stop,
- read_callback=lambda read: True,
- quality_threshold=15)
+ manual_counts = self.count_coverage_python(
+ self.samfile, chrom, start, stop,
+ lambda read: True,
+ quality_threshold=0)
+ fast_counts = self.samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: True,
+ quality_threshold=15)
# we filtered harder, should be less
for i in range(4):
for r in range(start, stop):
self.assertTrue(fast_counts[i][r] <= manual_counts[i][r])
def test_count_coverage_read_callback(self):
- chr = 'chr1'
+ chrom = 'chr1'
start = 0
stop = 2000
- manual_counts = self.count_coverage_python(self.samfile, chr, start, stop,
- lambda read: read.flag & 0x10,
- quality_threshold=0)
- fast_counts = self.samfile.count_coverage(chr, start, stop,
- read_callback=lambda read: True,
- quality_threshold=0)
+ manual_counts = self.count_coverage_python(
+ self.samfile, chrom, start, stop,
+ lambda read: read.flag & 0x10,
+ quality_threshold=0)
+ fast_counts = self.samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: True,
+ quality_threshold=0)
for i in range(4):
for r in range(start, stop):
self.assertTrue(fast_counts[i][r] >= manual_counts[i][r])
- fast_counts = self.samfile.count_coverage(chr, start, stop,
- read_callback=lambda read: read.flag & 0x10,
- quality_threshold=0)
+ fast_counts = self.samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: read.flag & 0x10,
+ quality_threshold=0)
self.assertEqual(fast_counts[0], manual_counts[0])
self.assertEqual(fast_counts[1], manual_counts[1])
@@ -1939,35 +2069,23 @@ class TestPileup(unittest.TestCase):
self.assertEqual(fast_counts[3], manual_counts[3])
def test_count_coverage_read_all(self):
- samfile = pysam.AlignmentFile(
- "test_count_coverage_read_all.bam", 'wb', template=self.samfile)
- for ii, read in enumerate(self.samfile.fetch()):
- # if ii % 2 == 0: # setting BFUNMAP makes no sense...
- #read.flag = read.flag | 0x4
- if ii % 3 == 0:
- read.flag = read.flag | 0x100
- if ii % 5 == 0:
- read.flag = read.flag | 0x200
- if ii % 7 == 0:
- read.flag = read.flag | 0x400
- samfile.write(read)
- samfile.close()
- pysam.index("test_count_coverage_read_all.bam")
samfile = pysam.AlignmentFile("test_count_coverage_read_all.bam")
- chr = 'chr1'
+ chrom = 'chr1'
start = 0
stop = 2000
def filter(read):
return not (read.flag & (0x4 | 0x100 | 0x200 | 0x400))
- fast_counts = samfile.count_coverage(chr, start, stop,
- read_callback='all',
- #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
- quality_threshold=0)
- manual_counts = samfile.count_coverage(chr, start, stop,
- read_callback=lambda read: not(
- read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
- quality_threshold=0)
+ fast_counts = samfile.count_coverage(
+ chrom, start, stop,
+ read_callback='all',
+ #read_callback = lambda read: ~(read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+ quality_threshold=0)
+ manual_counts = samfile.count_coverage(
+ chrom, start, stop,
+ read_callback=lambda read: not(
+ read.flag & (0x4 | 0x100 | 0x200 | 0x400)),
+ quality_threshold=0)
os.unlink("test_count_coverage_read_all.bam")
os.unlink("test_count_coverage_read_all.bam.bai")
diff --git a/tests/SamFile_test.py b/tests/SamFile_test.py
index 889ff96..79b11a5 100644
--- a/tests/SamFile_test.py
+++ b/tests/SamFile_test.py
@@ -13,6 +13,7 @@ import sys
import collections
import subprocess
import logging
+import array
from TestUtils import checkBinaryEqual, checkURL
IS_PYTHON3 = sys.version_info[0] >= 3
@@ -355,7 +356,7 @@ class TestIO(unittest.TestCase):
If *use_template* is set, the header is copied from infile
using the template mechanism, otherwise target names and
- lengths are passed explicitely.
+ lengths are passed explicitly.
'''
@@ -1080,7 +1081,7 @@ class TestContextManager(unittest.TestCase):
with pysam.Samfile(os.path.join(DATADIR, 'ex1.bam'),
'rb') as samfile:
samfile.fetch()
- self.assertEqual(samfile._isOpen(), False)
+ self.assertEqual(samfile.closed, True)
class TestExceptions(unittest.TestCase):
@@ -1523,12 +1524,36 @@ class TestEmptyHeader(unittest.TestCase):
s = pysam.Samfile(os.path.join(DATADIR, 'example_empty_header.bam'))
self.assertEqual(s.header, {'SQ': [{'LN': 1000, 'SN': 'chr1'}]})
+COMPARE_BTAG = [100, 1, 91, 0, 7, 101, 0, 201, 96, 204,
+ 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78,
+ 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1,
+ 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195,
+ 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0,
+ 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2,
+ 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194,
+ 0, 180, 0, 108, 0, 203, 104, 16, 5, 205,
+ 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0,
+ 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10,
+ 109, 102, 9, 0, 292, 0, 110, 0, 0, 102,
+ 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2,
+ 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0,
+ 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93,
+ 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0,
+ 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109,
+ 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112,
+ 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129,
+ 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100,
+ 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0,
+ 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0,
+ 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0,
+ 223, 111, 103, 0, 5, 225, 0, 95]
+
class TestBTagSam(unittest.TestCase):
'''see issue 81.'''
- compare = [[100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 10 [...]
+ compare = [COMPARE_BTAG,
[-100, 200, -300, -400],
[-100, 12],
[12, 15],
@@ -1536,26 +1561,47 @@ class TestBTagSam(unittest.TestCase):
filename = os.path.join(DATADIR, 'example_btag.sam')
- def testRead(self):
+ read0 = [('RG', 'QW85I'),
+ ('PG', 'tmap'),
+ ('MD', '140'),
+ ('NM', 0),
+ ('AS', 140),
+ ('FZ', array.array('H', COMPARE_BTAG)),
+ ('XA', 'map2-1'),
+ ('XS', 53),
+ ('XT', 38),
+ ('XF', 1),
+ ('XE', 0)]
+
+ def testReadTags(self):
s = pysam.Samfile(self.filename)
for x, read in enumerate(s):
+ tags = read.tags
if x == 0:
- self.assertEqual(read.tags, [('RG', 'QW85I'), ('PG', 'tmap'), ('MD', '140'), ('NM', 0), ('AS', 140), ('FZ', [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 20 [...]
- )
-
- fz = dict(read.tags)["FZ"]
+ self.assertEqual(tags, self.read0)
+
+ fz = list(dict(tags)["FZ"])
self.assertEqual(fz, self.compare[x])
- self.assertEqual(read.opt("FZ"), self.compare[x])
-
- def testWrite(self):
+ self.assertEqual(list(read.opt("FZ")), self.compare[x])
+ self.assertEqual(tags, read.get_tags())
+ for tag, value in tags:
+ self.assertEqual(value, read.get_tag(tag))
+
+ def testReadWriteTags(self):
s = pysam.Samfile(self.filename)
for read in s:
before = read.tags
- read.tags = read.tags
- after = read.tags
- self.assertEqual(after, before)
+ read.tags = before
+ self.assertEqual(read.tags, before)
+
+ read.set_tags(before)
+ self.assertEqual(read.tags, before)
+
+ for tag, value in before:
+ read.set_tag(tag, value)
+ self.assertEqual(value, read.get_tag(tag))
class TestBTagBam(TestBTagSam):
diff --git a/tests/TestUtils.py b/tests/TestUtils.py
index 3533f00..5cc048a 100644
--- a/tests/TestUtils.py
+++ b/tests/TestUtils.py
@@ -68,9 +68,9 @@ def checkSamtoolsViewEqual(filename1, filename2,
l1 = sorted(l1[:-1].split("\t"))
l2 = sorted(l2[:-1].split("\t"))
if l1 != l2:
- print "mismatch in line %i" % n
- print l1
- print l2
+ print ("mismatch in line %i" % n)
+ print (l1)
+ print (l2)
return False
else:
return False
diff --git a/tests/faidx_test.py b/tests/faidx_test.py
index c454e83..ee448c3 100644
--- a/tests/faidx_test.py
+++ b/tests/faidx_test.py
@@ -9,9 +9,9 @@ class TestFastaFile(unittest.TestCase):
sequences = {
'chr1':
- b"CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAAT [...]
+ "CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATG [...]
'chr2':
- b"TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCA [...]
+ "TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCAG [...]
}
def setUp(self):
@@ -27,25 +27,20 @@ class TestFastaFile(unittest.TestCase):
# test 0:x
self.assertEqual(seq[:x], self.file.fetch(id, None, x))
- # unknown sequence returns ""
- # change: should be an IndexError
- self.assertEqual(b"", self.file.fetch("chr12"))
+ # unknown sequence raises IndexError
+ self.assertRaises(KeyError, self.file.fetch, "chr12")
def testOutOfRangeAccess(self):
'''test out of range access.'''
# out of range access returns an empty string
for contig, s in self.sequences.items():
- self.assertEqual(self.file.fetch(contig, len(s), len(s) + 1), b"")
-
- self.assertEqual(self.file.fetch("chr3", 0, 100), b"")
+ self.assertEqual(self.file.fetch(contig, len(s), len(s) + 1), "")
def testFetchErrors(self):
self.assertRaises(ValueError, self.file.fetch)
- self.assertRaises(IndexError, self.file.fetch, "chr1", -1, 10)
+ self.assertRaises(ValueError, self.file.fetch, "chr1", -1, 10)
self.assertRaises(ValueError, self.file.fetch, "chr1", 20, 10)
-
- # does not work yet
- # self.assertRaises( KeyError, self.file.fetch, "chrX" )
+ self.assertRaises(KeyError, self.file.fetch, "chr3", 0, 100)
def testLength(self):
self.assertEqual(len(self.file), 2)
@@ -58,30 +53,82 @@ class TestFastaFile(unittest.TestCase):
self.file.close()
-class TestFastqFile(unittest.TestCase):
+class TestFastxFileFastq(unittest.TestCase):
+
+ filetype = pysam.FastxFile
+ filename = "faidx_ex1.fq"
+ persist = True
def setUp(self):
- self.file = pysam.FastqFile(os.path.join(DATADIR, "ex1.fq"))
+ self.file = self.filetype(os.path.join(DATADIR, self.filename),
+ persist=self.persist)
+ self.has_quality = self.filename.endswith('.fq')
+
+ def checkFirst(self, s):
+ # test first entry
+ self.assertEqual(s.sequence, "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC")
+ self.assertEqual(s.name, "B7_589:1:101:825:28")
+ if self.has_quality:
+ self.assertEqual(s.quality, "<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;")
+ self.assertEqual(list(s.get_quality_array()),
+ [ord(x) - 33 for x in s.quality])
+ self.assertEqual(str(s),
+ "@B7_589:1:101:825:28\n"
+ "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC\n"
+ "+\n"
+ "<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;")
+
+ else:
+ self.assertEqual(s.quality, None)
+ self.assertEqual(s.get_quality_array(), None)
+ self.assertEqual(str(s),
+ ">B7_589:1:101:825:28\n"
+ "GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC")
+
+ def checkLast(self, s):
+ self.assertEqual(s.sequence, "TAATTGAAAAATTCATTTAAGAAATTACAAAATAT")
+ self.assertEqual(s.name, "EAS56_65:8:64:507:478")
+ if self.has_quality:
+ self.assertEqual(s.quality, "<<<<<;<<<<<<<<<<<<<<<;;;<<<;<<8;<;<")
+ self.assertEqual(list(s.get_quality_array()),
+ [ord(x) - 33 for x in s.quality])
+ else:
+ self.assertEqual(s.quality, None)
+ self.assertEqual(s.get_quality_array(), None)
def testCounts(self):
self.assertEqual(len([x for x in self.file]), 3270)
def testMissingFile(self):
- self.assertRaises(IOError, pysam.FastqFile, "nothere.fq")
+ self.assertRaises(IOError, self.filetype, "nothere.fq")
def testSequence(self):
- s = self.file.__next__()
- # test first entry
- self.assertEqual(s.sequence, b"GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC")
- self.assertEqual(s.quality, b"<<86<<;<78<<<)<;4<67<;<;<74-7;,;8,;")
- self.assertEqual(s.name, b"B7_589:1:101:825:28")
-
- for s in self.file:
+ first = self.file.__next__()
+ self.checkFirst(first)
+ for last in self.file:
pass
- # test last entry
- self.assertEqual(s.sequence, b"TAATTGAAAAATTCATTTAAGAAATTACAAAATAT")
- self.assertEqual(s.quality, b"<<<<<;<<<<<<<<<<<<<<<;;;<<<;<<8;<;<")
- self.assertEqual(s.name, b"EAS56_65:8:64:507:478")
+ self.checkLast(last)
+
+ # test for persistence
+ if self.persist:
+ self.checkFirst(first)
+ else:
+ self.checkLast(first)
+
+
+# Test for backwards compatibility
+class TestFastqFileFastq(TestFastxFileFastq):
+ filetype = pysam.FastqFile
+
+
+# Test for backwards compatibility
+class TestFastxFileFasta(TestFastxFileFastq):
+ filetype = pysam.FastqFile
+ filename = "faidx_ex1.fa"
+
+
+class TestFastxFileFastqStream(TestFastxFileFastq):
+ persist = False
if __name__ == "__main__":
unittest.main()
diff --git a/tests/pysam_data/Makefile b/tests/pysam_data/Makefile
index 8b0964a..6166fd2 100644
--- a/tests/pysam_data/Makefile
+++ b/tests/pysam_data/Makefile
@@ -48,7 +48,7 @@ ex1.bam:ex1.sam.gz ex1.fa.fai
samtools index $<
ex1.pileup.gz:ex1.bam ex1.fa
- samtools pileup -cf ex1.fa ex1.bam | gzip > ex1.pileup.gz
+ samtools mpileup -f ex1.fa ex1.bam | gzip > ex1.pileup.gz
ex2_truncated.bam: ex2.bam
head -c 124000 ex2.bam > ex2_truncated.bam
diff --git a/tests/pysam_data/faidx_ex1.fa b/tests/pysam_data/faidx_ex1.fa
new file mode 100644
index 0000000..d16e255
--- /dev/null
+++ b/tests/pysam_data/faidx_ex1.fa
@@ -0,0 +1,6540 @@
+>B7_589:1:101:825:28
+GGGAACAGGGGGGTGCACTAATGCGCTCCACGCCC
+>B7_589:1:101:825:28
+TGTGTCCATGTACACACGCTGTCCTATGTACTTAT
+>B7_589:1:110:543:934
+AAGAATTGTAAAAGTCAAAATTAAAGTTCAATACT
+>B7_589:1:110:543:934
+ACAAGCCAGAAGAGATTGGATCTAATTTTTGGACT
+>B7_589:1:122:337:968
+ACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACT
+>B7_589:1:122:337:968
+GCTTTACTGTCTAAACTATGAAGAGACTATTGCCA
+>B7_589:1:122:77:789
+ACTATATTTATGCTATTCAGTTCTAAATATAGAAA
+>B7_589:1:122:77:789
+GGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAA
+>B7_589:1:168:69:249
+ATATGCTGTTTACAAGAAACTCATTAATAAAGACA
+>B7_589:1:168:69:249
+TTCAGCAAGAAGATATAACCATCCTACTAAATACA
+>B7_589:1:29:529:379
+CAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAAT
+>B7_589:1:29:529:379
+GACTCTATCCCAAATTCCCAATTACGTCCTATCTT
+>B7_589:2:30:644:942
+TACCTAATTGGTACAATGTACAATATTCTGATGAT
+>B7_589:2:30:644:942
+TATATCAGATAAAGCACACTTTAAATCAACAACAG
+>B7_589:2:73:730:487
+AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTT
+>B7_589:2:73:730:487
+TAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTA
+>B7_589:2:9:49:661
+TACACACAAAAGTACAAAACTCACAGGTTTTATAA
+>B7_589:2:9:49:661
+TGCTAAACTAAGCATCATAAATGAAGCGGAAATAA
+>B7_589:3:71:478:175
+ACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGT
+>B7_589:3:71:478:175
+TAGACATCTAAATGAAAGAGGCTCAAAGAATGCCA
+>B7_589:3:82:13:897
+ATACAGTCATCTATAAAGGAAATCCCAGCAGAATA
+>B7_589:3:82:13:897
+CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT
+>B7_589:4:54:989:654
+ACTTATCATGACTCTATCCCAAATTCCCAATTACG
+>B7_589:4:54:989:654
+TCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTT
+>B7_589:5:147:405:738
+AGGGATTAAATTCCCCCACTTAAGAGATATAGATT
+>B7_589:5:147:405:738
+ATCAGATAAAGCACACTTTAAATCAACAACAGTAA
+>B7_589:5:198:564:731
+ACAAAGGAGGTCATCATACAATGATAAAAAGATCA
+>B7_589:5:198:564:731
+ATAGATTGGCAGAACAGATTTAAAAACATGAACTA
+>B7_589:5:50:950:562
+CTATTTTTGTCTTGACACCCTACTAATATTTGTCT
+>B7_589:5:50:950:562
+GCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGC
+>B7_589:5:68:440:424
+ACACTTTAAATCAACAACAGTAAAATAAAACAAAG
+>B7_589:5:68:440:424
+TGGTACAATGTACAATATTCTGATGATGGTTACAC
+>B7_589:6:108:958:42
+AAAGTACAAAACTCACAGGTTTTATAAAACAATTA
+>B7_589:6:108:958:42
+TATGCCCTGCTAAACTAAGCATCATAAATGAAGGG
+>B7_589:6:114:714:317
+AACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGC
+>B7_589:6:114:714:317
+TGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCA
+>B7_589:6:120:14:944
+CAAAAGGTGATGTGTGTTCTCATCAACCTCATACA
+>B7_589:6:120:14:944
+CAAAGATGAAACGCGTAACTGCGCTCTCATTCACT
+>B7_589:6:33:356:636
+TTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTT
+>B7_589:7:112:203:90
+CCTGTCACCCAATGGACCTGTGATATCTGGATTCT
+>B7_589:7:112:203:90
+CTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGA
+>B7_589:7:154:26:712
+ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC
+>B7_589:7:154:26:712
+TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA
+>B7_589:7:72:916:763
+CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT
+>B7_589:7:72:916:763
+GTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGT
+>B7_589:7:76:306:561
+GGTGGAAAAAGATGTTCTACGCAAACAGAAACCAA
+>B7_589:7:76:306:561
+TACAGAGCAACTAGGTAAAAAATTAACATTACAAC
+>B7_589:7:93:634:323
+CTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT
+>B7_589:7:93:634:323
+TAAAGTTCAATACTCACCATCATAAATACACACAA
+>B7_589:8:113:968:19
+GAAAACTATATTTATGCTATTCAGTTCTAAATATA
+>B7_589:8:118:829:36
+AGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTC
+>B7_589:8:118:829:36
+TAATTGGTACAATGTACAATATTCTGATGATGGTT
+>B7_589:8:139:727:808
+AAGTAAATAAAACACATAGCTAAAACTAAAAAAGC
+>B7_589:8:139:727:808
+ACAAATACTACTAGACCTAAGAGGGATGAGAAATT
+>B7_589:8:157:935:374
+CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA
+>B7_589:8:157:935:374
+TCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCA
+>B7_589:8:2:434:715
+AGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCT
+>B7_589:8:2:434:715
+CTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTT
+>B7_589:8:74:674:124
+CACTGAACTTCCACGTCTCATCTAGGGGAACAGGG
+>B7_589:8:74:674:124
+TTCTTATCTGCACATTACTACCCTGCAATTAATAT
+>B7_591:1:191:462:705
+CAGATCCAGATTGCTTGTGGTCTGACAGGCTGCAAC
+>B7_591:1:191:462:705
+CATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAG
+>B7_591:1:60:837:923
+CATCAACCGCATACACTCACATGGTTTAGGGGTATA
+>B7_591:1:60:837:923
+TTCACGCCAGCTCCCTGTCACCCAATGGACCTCTGA
+>B7_591:2:123:924:645
+TATATCAGATAAAGCACACTTTAAATCAACAACAGT
+>B7_591:2:123:924:645
+TGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGA
+>B7_591:2:134:868:252
+AAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAAC
+>B7_591:2:134:868:252
+ATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGA
+>B7_591:2:13:100:876
+ACAGGGATTCCTGAGGAAAAAGAAAAAGTGAGAAGT
+>B7_591:2:13:100:876
+AGAATATATAAAGTCAACATGAAGGAAAAAAATTCT
+>B7_591:2:223:583:968
+AATATAGTTGAAAGCTCTAACAATAGACTAAACCAA
+>B7_591:2:223:583:968
+TATGAGGCACAGGTATTCCTGAGGAAAAAGAAAAAG
+>B7_591:2:240:603:890
+GCTCCCAAGAGGGAAAGCTTTCAACGCTTCTAGCCA
+>B7_591:2:240:603:890
+TCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAA
+>B7_591:2:279:124:41
+GAATTAACCCAGTCAGACAAAAANNAAGAAAAAAGA
+>B7_591:2:279:124:41
+GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA
+>B7_591:2:27:280:592
+AATAACAATGGGCTTCTCAGCGGAAACCTTACAAGC
+>B7_591:2:27:280:592
+AGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGA
+>B7_591:2:309:798:997
+TTTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT
+>B7_591:2:323:639:311
+AAGCCGTTCTATTTGTAATGAAAACTATATTTAGGC
+>B7_591:2:323:639:311
+TACCAAATGTGTTTATTACCAGAGGGATGGAGGGAA
+>B7_591:2:46:220:58
+CAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAA
+>B7_591:2:46:220:58
+TTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTT
+>B7_591:3:168:69:605
+TACCCGAGGGATGGAGGGTAGAGGGACGCTGAAGTG
+>B7_591:3:168:69:605
+TCTGACAGGCGGCAACTGTGAGCCATCACAATGAAC
+>B7_591:3:179:496:161
+AAAAACATGAACTAACTATATGCTGTTTACAAGAAA
+>B7_591:3:179:496:161
+AAGTACAAAACTCACAGGTTTTATAAAACAATTAAT
+>B7_591:3:277:458:330
+AATGTCAGGGAAGGAGCCTTTTGTCAGTTACCAAAT
+>B7_591:3:277:458:330
+TGATATCTGGATTCTGGGAAATTCTTCATCCTGGAC
+>B7_591:3:291:404:199
+TATAAAACAATTAATTGAGACTACAGAGCAACTAGG
+>B7_591:3:291:404:199
+TGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGC
+>B7_591:3:305:565:952
+GTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGT
+>B7_591:3:305:565:952
+TAATACTATGTTTCTTATCTGCACATTACTACCCTG
+>B7_591:3:45:294:380
+ATAATTGTGTCCATGTACACACGATGTCATATGTAC
+>B7_591:3:45:294:380
+CCTCGTCCACACTGGTTCGCTTGAAAGCTTGGGCTG
+>B7_591:4:103:111:720
+CAGTTACCAAATGTGTTTATTACCAGAGGGATGGAG
+>B7_591:4:103:111:720
+TCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAG
+>B7_591:4:159:508:571
+CAAAACCTCATATATCAATATTAACTTTGAATAAAA
+>B7_591:4:159:508:571
+TGGAAAAAGATGTTCTACGCAAACAGAAACCAAATG
+>B7_591:4:216:650:516
+GAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAA
+>B7_591:4:216:650:516
+TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT
+>B7_591:4:329:339:408
+CAATCCAGAAGAGATTGGATCTAATTTTTGGACTTC
+>B7_591:4:329:339:408
+TAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC
+>B7_591:4:92:411:955
+GGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG
+>B7_591:4:92:411:955
+TACTAAATACATATGCACCTAACACAAGACTACCCA
+>B7_591:5:124:978:501
+AATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTC
+>B7_591:5:124:978:501
+ATGTAACAAATCTGCGCTTGTACTTCTAAATCTATA
+>B7_591:5:134:751:831
+AGCTCCCTGTCACCCAATGGACCTGTGATATCTGGA
+>B7_591:5:134:751:831
+ATACACACACATGGTTTAGGGGTATAATACCTCTAC
+>B7_591:5:243:557:560
+AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA
+>B7_591:5:243:557:560
+CTAAGCAGAAACCTTACAAGCCAGAAGAGATTGGAT
+>B7_591:5:254:542:848
+CCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAA
+>B7_591:5:254:542:848
+CTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC
+>B7_591:5:289:132:526
+CACCCGGTCCCTGCCCCATCTCTTGTAATCTCTCTC
+>B7_591:5:289:132:526
+TCTATTTTTGTCTTGACACCCAACTAATATTTGTCT
+>B7_591:5:42:540:501
+CTATATTTATGCTATTCAGTTCTAAATATAGAAATT
+>B7_591:5:90:828:633
+CTGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAG
+>B7_591:5:90:828:633
+GGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATG
+>B7_591:6:11:646:628
+GTACTTATCATGACTCTATCCCAAATTCCCAATTAC
+>B7_591:6:11:646:628
+TTCTGCCCCCAGCATGGTTGTACTGGGCAATACATG
+>B7_591:6:155:12:674
+CTATATTTATGCTATTCAGTTCTAAATATAGAAATT
+>B7_591:6:181:191:418
+AAACTATATTTATGCTATTCAGTTCTAAATATAGAA
+>B7_591:6:181:191:418
+AGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCT
+>B7_591:6:190:42:671
+TATTGCCAGATGAACCACACATTAATACTATGTTTC
+>B7_591:6:190:42:671
+TGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA
+>B7_591:6:29:575:453
+TACTACCCTGCAATTAATATAATTGTGTCCATTTAC
+>B7_591:6:29:575:453
+TCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTG
+>B7_591:7:116:814:89
+ACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAA
+>B7_591:7:116:814:89
+CCTAATTGGTACAATGTACAATATTCTGATGATGGT
+>B7_591:7:129:956:115
+AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATAC
+>B7_591:7:129:956:115
+GCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCA
+>B7_591:7:157:447:758
+AAAGATGTTCTACGCAAACAGAAACCAAATGAGAGA
+>B7_591:7:157:447:758
+ACAAAACCTCATATATCAATATTAACTTTGAATAAA
+>B7_591:7:200:192:373
+AGTGCCTTTGTTCACATAGACCCCCTTGCAACAACC
+>B7_591:7:200:192:373
+CTCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC
+>B7_591:7:22:632:176
+AAAGGAGGTCATCATACAATGATAAAAAGATCAATT
+>B7_591:7:22:632:176
+AGATATAGATTGGCAGAACAGATTTAAAAACATGAA
+>B7_591:7:68:242:834
+AAATAAAAAAGCAAAAACAAAAACTATGCTAAGTAT
+>B7_591:7:68:242:834
+TACTACTAGACCTAAGAGGGATGAGAAATTACCTAA
+>B7_591:7:89:67:709
+TTTTTTTTTTTGTCTTCTCTTTTTTTTTTTTTTTTT
+>B7_591:8:4:841:340
+TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAA
+>B7_593:1:12:158:458
+CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCT
+>B7_593:1:12:158:458
+TAATAATGCTACATGGATGATTATGAAATCAATGTT
+>B7_593:1:189:876:833
+CAAGACTACCCAGATTCATAAAACAAATACTACTAG
+>B7_593:1:189:876:833
+TTAAAATTTAACAAAAGTAAATAAAACACATAGCTA
+>B7_593:1:19:695:59
+AACAGGAACAAAACCTCATATATCAATATTAACTTT
+>B7_593:1:19:695:59
+GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAAT
+>B7_593:1:200:559:765
+GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTAT
+>B7_593:1:200:559:765
+TGGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT
+>B7_593:1:215:861:605
+GAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTC
+>B7_593:1:215:861:605
+NAAGACAAGTCTCTTATGAATTAACCCAGTCAGACA
+>B7_593:1:36:485:632
+AAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCA
+>B7_593:1:36:485:632
+GGCATTTGCCTTCAGACCCTACACGAATGCGTCTCT
+>B7_593:1:85:361:418
+AGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTT
+>B7_593:1:85:361:418
+GAAGATACATTGCAAGACAGACTTCATCAAGATATG
+>B7_593:2:104:744:280
+CATATGGAAAGGTTGTTGGGATTTTTTTAATGATTC
+>B7_593:2:104:744:280
+TGGGCTGTAATGATGCCCCTTGTCCATCACCCGGTC
+>B7_593:2:125:875:553
+AACAGTAAAATAAAACAAAGGAGGTCATCATACAAT
+>B7_593:2:125:875:553
+TAATTGGTACAATGTACAATATTCTGATGATGGTTA
+>B7_593:2:128:555:941
+AACCAAAAGAGAGAAGGAGTAGTTATACACATATCA
+>B7_593:2:133:460:542
+CCTATAAGCCGTTCTATTTGTAATGAAAACTATATT
+>B7_593:2:133:460:542
+TTACCAAATGTGTTTATTACCAGAGGGATGGAGGGA
+>B7_593:2:259:467:737
+CTATGTACTTATCATGACTCTATCCCAAATTCCCAA
+>B7_593:2:259:467:737
+TCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCA
+>B7_593:2:270:430:269
+AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA
+>B7_593:2:270:430:269
+CCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA
+>B7_593:2:273:348:37
+AGAAATGCGCAAAAGAATTGTAAAAGTCAAAATTAA
+>B7_593:2:273:348:37
+GAATAACAATGGGCTTCTCAGCAGAAACCTTACACG
+>B7_593:2:313:531:169
+GAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATG
+>B7_593:2:313:531:169
+GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATT
+>B7_593:2:43:239:977
+TGAACTTCTGTAATTGAAAAATTCATTTAAGAAATT
+>B7_593:2:68:140:542
+AAAAACAAAAACTATGCTAAGTATTGGTAAAGATGT
+>B7_593:2:68:140:542
+GGGATGAGAAATTACCTAATTGGTACAATGTACAAT
+>B7_593:2:68:692:347
+TATCAATTTGGTGTTCTGTGTAAAGTCTCATGGAGC
+>B7_593:2:68:692:347
+TGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTT
+>B7_593:2:81:435:410
+AGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT
+>B7_593:2:81:435:410
+ATTAATTGAGACTACAGAGCAACTAGGTAAAAAATT
+>B7_593:3:102:856:670
+AAACCTCATATATCAATATTAACTTTGAATAAAAAG
+>B7_593:3:102:856:670
+AGAGAAGGAGTAGCTATACTTATATCAGATAAAGCA
+>B7_593:3:115:649:259
+ATTAATTGAGAATACAGAGCAACTAGGTAAAAAATT
+>B7_593:3:115:649:259
+GGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG
+>B7_593:3:148:437:481
+CGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGA
+>B7_593:3:148:437:481
+GTACTGGGCAATACATGAGATTATTAGGAAATGCTT
+>B7_593:3:180:89:582
+ATGCTAAGATAATTCATCATCACTAAACCAGTCCTA
+>B7_593:3:180:89:582
+TAAAAAATTAACATTACAACAGGAACAAAACCTCAT
+>B7_593:3:194:168:684
+AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT
+>B7_593:3:194:168:684
+CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTT
+>B7_593:3:196:11:27
+AAGACCCAGTTAGCCGTCCGTGTCCTCCCATCTGGC
+>B7_593:3:196:11:27
+CTATGTTTCTTATCTGCNCATTACTACCCTGCAATT
+>B7_593:3:303:131:673
+ACAAGAAACTCATTAATAAAGACATGAGTTCAGGTA
+>B7_593:3:303:131:673
+ATAAAAAGATCAATTCAGCAAGAAGATATAACCATC
+>B7_593:3:310:193:629
+CATACAATGATAAAAAGATCAATTCAGCAAGAAGAT
+>B7_593:3:310:193:629
+TACACTAAAAGCCCATACTTTACTGCTACTCAATAT
+>B7_593:4:104:153:698
+CCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAG
+>B7_593:4:104:153:698
+CTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTT
+>B7_593:4:106:316:452
+CTATATTTATGCTATTCAGTTCTAAATATAGAAATT
+>B7_593:4:142:63:937
+GAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATC
+>B7_593:4:142:63:937
+TTCTTTTGGCATTTGCCTTCAGACCCTACACGAATG
+>B7_593:4:28:781:723
+AATACCTCTACATGGCTGATTATGAAAACAATGTTC
+>B7_593:4:28:781:723
+ACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG
+>B7_593:4:29:794:282
+CACATTAATACTATGTTTCTTATCTGCACATTACTA
+>B7_593:4:29:794:282
+TAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTG
+>B7_593:4:30:117:411
+TCTTGACACCCAACTAATATTTGTCTGAGCAAAACA
+>B7_593:4:30:117:411
+TCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTC
+>B7_593:4:30:812:345
+TATGTTTCTTATCTGCACATTACTACCCTGCAATTA
+>B7_593:4:30:812:345
+TCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTC
+>B7_593:4:315:201:673
+AAGATATGTAGTCATCAGACTATCTAAAGTCAACAT
+>B7_593:4:315:201:673
+TTGGACTTATTAAAGAAAAAAAAACCTGTCAAACAC
+>B7_593:5:171:343:758
+ACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGA
+>B7_593:5:171:343:758
+GCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG
+>B7_593:5:267:71:603
+TTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAA
+>B7_593:5:267:71:603
+TTTAATGATTCCTCAATGTTAAAATGTCTATTTTTG
+>B7_593:5:299:743:762
+AAAATTAAAATTTAACAAAAGTAAATAAAACACATA
+>B7_593:5:299:743:762
+CAAGACTACCCAGATTCATAAAACAAATACTACTAG
+>B7_593:5:30:599:589
+CTACGCAAACAGAAACCAAATGAGAGAAGGAGCAGC
+>B7_593:5:30:599:589
+TCATAAAACAAATACTACTAGACCTAAGAGGGATGA
+>B7_593:6:118:121:760
+GAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAA
+>B7_593:6:119:428:415
+GCTATACTTATATCAGATAAAGCACACTTTAAATCA
+>B7_593:6:119:428:415
+TAAATTCCCCCACTTAAGAGATATAGATTGGCAGAA
+>B7_593:6:185:96:948
+CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATA
+>B7_593:6:185:96:948
+TTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGAT
+>B7_593:6:38:332:54
+CCATCATAAATACACACAAAAGTACAAAACTCACAG
+>B7_593:6:38:332:54
+TGGCAGAACAGATTTAAAAACATGAACTAACTATAT
+>B7_593:6:61:628:681
+CAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTT
+>B7_593:6:61:628:681
+GCCATCACAATGAACAACAGGAAGAAAAGGTCTTTC
+>B7_593:7:15:244:876
+AAAAGTAAACTCTCAAATATTGCTAGTGTGAGTATA
+>B7_593:7:15:244:876
+GTACAATATTCTGATGATGGTTACACTAAAAGCCCA
+>B7_593:7:189:530:40
+AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATAC
+>B7_593:7:189:530:40
+CCCCACTTAAGAGATATAGATTGGCAGAACAGATTT
+>B7_593:7:256:354:173
+CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGA
+>B7_593:7:256:354:173
+TCAATTCAGCAAGAAGATATAACCATCCTACTAAAT
+>B7_593:7:283:186:707
+AATACATATGCACCTAACACAAGACTACCCAGATTC
+>B7_593:7:283:186:707
+CGCTTGTACTTCTAAATCTATAACAAAATTAAAATT
+>B7_593:7:307:481:625
+AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG
+>B7_593:7:307:481:625
+TCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA
+>B7_593:7:67:302:762
+GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCC
+>B7_593:7:67:302:762
+TTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA
+>B7_593:7:6:585:132
+GCCCCTTGACCACCACCCAGTCCCTGCCCCATCTCT
+>B7_593:7:6:585:132
+TGTACTTATCATGTTTCTTTCCTAATTTTTCAATTA
+>B7_593:7:87:89:696
+TGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCC
+>B7_593:7:87:89:696
+TTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGAT
+>B7_595:1:209:345:87
+AAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT
+>B7_595:1:209:345:87
+TTACTTGTTGTTGGTTTTCTGTTTCTTTTTTTGAT
+>B7_595:1:209:653:400
+AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA
+>B7_595:1:209:653:400
+CTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCT
+>B7_595:1:252:19:955
+AGCCAGTTCTTTTGGCATTTGCCTTCAGACCCTCC
+>B7_595:1:252:19:955
+TGAACAAAAGGAAGAAAAGGTCTTTCAAAAGGTGA
+>B7_595:1:81:1000:375
+ACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGC
+>B7_595:1:81:1000:375
+NATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAA
+>B7_595:2:178:77:424
+CTACCCTGCAATTAATATAATTGTGTCCATGTACA
+>B7_595:2:178:77:424
+TGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTG
+>B7_595:2:251:121:479
+GGCTGCAACTGTGAGCCATCACAATGAACAACAGG
+>B7_595:2:251:121:479
+GGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTG
+>B7_595:2:29:729:70
+AAACAGAAACCAAATGAGAGAAGGAGTAGCTATAC
+>B7_595:2:29:729:70
+ANTATTANCTTTGANNAAAAAGGGATTAAATTCCC
+>B7_595:3:229:543:583
+ATAACCATCCTACTAAATACATATGCACCTAACAC
+>B7_595:3:229:543:583
+TCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAA
+>B7_595:3:297:637:86
+CAATGTTCCCCAGATACCATCCCTGTCTTACTTCC
+>B7_595:3:297:637:86
+TCTCAGCTAGGGGAACAGGGAGGTGCACTAATGCG
+>B7_595:3:57:735:151
+CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC
+>B7_595:3:57:735:151
+TAAACTCTCACCTTATTGCTGCATCCCTGTCTTCC
+>B7_595:3:85:964:950
+AACAGATTTAAAAACATGAACTAACTATATGCTGT
+>B7_595:3:85:964:950
+GAGGTCATCATACAATGATAAAAAGATCAATTCAG
+>B7_595:4:12:402:843
+AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC
+>B7_595:4:12:402:843
+ATATAATTGTGTCCATGTACACACGCTGTCCTATG
+>B7_595:4:319:250:718
+AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTG
+>B7_595:4:319:250:718
+AGTTGAAAGCTCTAACAATAGACTAAACCAAGCAG
+>B7_595:4:58:703:72
+GTACACACGCTGTCCTATGTACTTATCATGACTCT
+>B7_595:4:58:703:72
+TCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCC
+>B7_595:4:84:802:737
+CATAGACCCCCTTGCAACAACCTTGAGAACCCCAG
+>B7_595:4:84:802:737
+CTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAG
+>B7_595:5:184:912:258
+ATTCTAAAATCAGCAAGAGAAAAGCATACAGTCAT
+>B7_595:5:184:912:258
+GTTATGCCCTGCTAAACTAAGCATCATAAATGAAG
+>B7_595:5:36:649:554
+AAGAGATTGGATCTAATTTTTGGACTTCTTAAAGA
+>B7_595:5:36:649:554
+CAGGAAGATACATTGCAAGACAGACTTCATCAAGA
+>B7_595:5:84:91:614
+GAACCACACATTAATACTATGTTTCTTATCTGCAC
+>B7_595:5:84:91:614
+TTTCCCATCATGAAGCACTGATCTTCCACGTCTCA
+>B7_595:6:119:730:190
+AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA
+>B7_595:6:119:730:190
+AGTCTTGCTAGAGATTTAGACATCTAAATGAAAGA
+>B7_595:6:137:811:130
+AAAATTTAACAAAAGTAAATAAAACACATAGCTAA
+>B7_595:6:137:811:130
+AGACTACCCAGATTCATAAAACAAATACTACTAGA
+>B7_595:6:290:270:557
+ACATTACTACCCTGCAATTAATATAATTGTGTCCA
+>B7_595:6:290:270:557
+GGAACAGGGAGGTGCACTAATGCGCTCCACGCCCA
+>B7_595:6:47:720:789
+CCCTTGGCCATCACCCGGTCCCGGCCCCTTCTCTT
+>B7_595:6:47:720:789
+TCCTCAATGTTAAAATGTCTATTTTTGTCTTGACA
+>B7_595:6:52:751:360
+AAAAACTATTTGAGGAAGTAATTGGGGAAAACCTC
+>B7_595:6:52:751:360
+AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC
+>B7_595:6:99:557:427
+AACAAAATTAAAATTTAACAAAAGTAAATAAAACA
+>B7_595:6:99:557:427
+ATTCATAAAACAAATACTACTAGACCTAAGAGGGA
+>B7_595:7:123:610:472
+GTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTC
+>B7_595:7:123:610:472
+TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT
+>B7_595:7:149:123:265
+AAGAGGGATGAGAAATTACCTAATTGGTACAATGT
+>B7_595:7:149:123:265
+AGCAAAAACAAAAACTATGCTAAGTATTGGTAAAG
+>B7_595:7:166:203:416
+AATTACGTCCTATCTTCTTCTTAGGGAAGAACAGC
+>B7_595:7:166:203:416
+ATGAGATTATTAGGAAATGCTTTACTGTCATAACT
+>B7_595:7:188:802:71
+ATGCTATTCAGTTCTAAATATAGAAATTGAAACAG
+>B7_595:7:188:802:71
+TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGC
+>B7_595:7:190:481:295
+GAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTG
+>B7_595:7:190:481:295
+TCATAACTATGAAGAGACTATTGCCAGATGAACCA
+>B7_595:7:242:4:593
+ATATACACACGCTGTCCTATGTACTTATCATGACT
+>B7_595:7:242:4:593
+TCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCC
+>B7_595:8:26:242:35
+ATAAAACAAAGGAGGTCATCATACAATGATAAAAA
+>B7_595:8:26:242:35
+ATATTTTGATGATGGTTACACTAAAAGCCCATACT
+>B7_597:2:100:563:301
+GAACTTCTGTAATTGAAAAATTCATTTAAGAAATT
+>B7_597:2:132:493:921
+ACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTG
+>B7_597:2:132:493:921
+GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC
+>B7_597:2:165:431:857
+GAACTTTGATGCCCTCTTCTTCCAAAGATGAAACG
+>B7_597:2:165:431:857
+TTGGGGTCTGACAGGCTGCAACTGTGAGCCATCAC
+>B7_597:2:168:829:88
+ACAGACTTCATCAAGATATGTAGTCATCAGACTAT
+>B7_597:2:168:829:88
+TAACTGAACCTATGAGTCACAGGTATTCCTGAGGA
+>B7_597:2:42:28:552
+AAGAAGATATAACCATCCTACTAAATACATATGCA
+>B7_597:2:42:28:552
+ACTCAATATATCCATGTAACAAATCTGCGCTTGTA
+>B7_597:3:10:394:392
+TCATCAAGATATGTAGTCATCAGACTATCTAAATT
+>B7_597:3:10:394:392
+TTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACA
+>B7_597:3:115:646:430
+CACAGGTTTTATAAAACAATTAATTGAGACTACAG
+>B7_597:3:115:646:430
+GTTATGCCCTGCTAAACTTAGCATCATAAATGAAG
+>B7_597:3:133:707:886
+ACCTAATAAATACATATGCACCTAACACAAGACTA
+>B7_597:3:133:707:886
+AGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAAC
+>B7_597:3:157:361:309
+CCTGCTAAACTAAGCATCATAAATGAAGGGGAAAT
+>B7_597:3:157:361:309
+TACACACAAAAGTACAAAACTCACAGGTTTTATAA
+>B7_597:3:39:966:551
+ACATTAATACTATGTTTCTTATCTGCACATTACTA
+>B7_597:3:39:966:551
+AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTC
+>B7_597:3:46:981:766
+TCTTTTGGCATTTGCCTTCAGACCCTACACGAATG
+>B7_597:3:46:981:766
+TGCCCCCAGCATGGTTGTACTGGGCAATACATGAG
+>B7_597:3:53:616:842
+CTTATCATGACTCTATCCCAAATTCCCACTTACGT
+>B7_597:3:53:616:842
+TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC
+>B7_597:3:67:620:344
+AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT
+>B7_597:3:67:620:344
+CCCCCGCCCAAGCCCTTCTCACAGTTTCTGCCCCC
+>B7_597:3:73:273:488
+AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGG
+>B7_597:3:73:273:488
+CTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTG
+>B7_597:4:138:211:582
+CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG
+>B7_597:4:138:211:582
+TGAGACTACAGAGCAAATAGGTAAAAAATTAACAT
+>B7_597:4:144:492:61
+AAATACTCACCATCATAAATACACACAAAAGTACA
+>B7_597:4:144:492:61
+AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC
+>B7_597:4:146:961:63
+TACTTATATCAGATAAAGCACACTTTAAATCAACA
+>B7_597:4:146:961:63
+TGAATAAAAAGGGCTTAAATTCCCCCACTTAAGGG
+>B7_597:4:38:999:463
+GCGGAAACCTTACAAGCCAGAAGAGATTGGATCTA
+>B7_597:4:38:999:463
+TAGACATCTAAATGAAAGNNGCNNNAAGAATGCCA
+>B7_597:5:125:957:753
+TTTTTTTTTTTTCTCTCCTCTTTTTTTTTTTTTTT
+>B7_597:5:160:434:853
+ATATAATTGTGTCCATGTACACACGCTGTCCTATG
+>B7_597:5:160:434:853
+GCTTGGGCTGTAATGATGCCCCTTGGCCATCACCC
+>B7_597:5:58:684:520
+AGACAGACTTCATCAAGATATGTAGTCATCAGACT
+>B7_597:5:58:684:520
+ATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCA
+>B7_597:5:6:882:784
+CATGGCTGATTATGAAAACAATGTTCCCCAGATAC
+>B7_597:5:6:882:784
+CTGGATTCTGGGAAATTCTTCATCCTGGACCCTGA
+>B7_597:5:98:995:929
+GTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGT
+>B7_597:5:98:995:929
+TATAACAATATTAACTTTGAATAAAAAGGGATTAA
+>B7_597:6:106:595:322
+GAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGC
+>B7_597:6:106:595:322
+GCAAGAGAAAAGCATACAGTCATCTATAAAGGAAA
+>B7_597:6:193:661:771
+AAACTCATTAATAAAGACATGAGTTCAGGTAAAGG
+>B7_597:6:193:661:771
+GCAAGAAGATATAACCATCCTACTAAATACATATG
+>B7_597:6:20:592:496
+CTCAAAGAATGCCAGGAAGATACATTGCAAGACAG
+>B7_597:6:20:592:496
+TCTCAGCGGAAACCTTACAAGCCAGAAGAGATTGG
+>B7_597:6:29:249:878
+ATTGTGTCCATGTACACACGCTGTCCTATGTACTT
+>B7_597:6:29:249:878
+TCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGG
+>B7_597:6:73:420:812
+CTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAA
+>B7_597:6:73:420:812
+CTAATTGGTACAATGTACAATATTCTGATGATGGT
+>B7_597:7:103:731:697
+AATTAACCCAGTCAGACAAAAATAAAGAAAAAAGA
+>B7_597:7:103:731:697
+CTTGCTAGAGATTTAGACATCTAAATGAAAGAGGC
+>B7_597:7:113:408:211
+AAACTCATTAATAAAGACATGAGTTCAGGTAAAGG
+>B7_597:7:113:408:211
+GAGACTACAGAGCAACTAGGTAAAAAATTAACATT
+>B7_597:7:31:948:254
+CCTCTACATGGCTGATTATGAAAACAATGTTCCCC
+>B7_597:7:31:948:254
+TGAAGCACTGAACTTCCACGTCTCATCTAGGGGAA
+>B7_597:7:41:34:211
+CACCTAACACAAGACTACCCAGATTCATAAAACAA
+>B7_597:7:41:34:211
+GTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG
+>B7_597:7:5:753:806
+AAAATATAGTTGAAAGCTCTAACAATAGACTAAAC
+>B7_597:7:5:753:806
+ATGTAAAGTAACTGAACCTATGAGTCACAGGTATT
+>B7_597:7:94:273:165
+AGAAAGAAGATATAACCATCCTACTAAATACATAT
+>B7_597:7:94:273:165
+TTACAAGAAACTCATTAATAAAGACATGAGTTCAG
+>B7_597:8:147:360:141
+TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA
+>B7_597:8:147:360:141
+TTTGGTAATTTAGTTTTTTTTTTTTCTTTTCTCTT
+>B7_597:8:186:850:838
+GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT
+>B7_597:8:186:850:838
+GTTCTATTTGTAATGAAAACTATATTTATGCTATT
+>B7_597:8:35:118:589
+TCATAAAACAAATACTACTAGACCTAAGAGGGATG
+>B7_597:8:35:118:589
+TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA
+>B7_597:8:48:805:860
+AAACCTGTCAAACACGAATGTTATGCCCTGCTAAA
+>B7_597:8:48:805:860
+AAAGTACAAAACTCACAGGTTTTATAAAACAATTA
+>B7_610:1:12:88:200
+ACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTT
+>B7_610:1:12:88:200
+GAAGATATAACCATCCTACTAAATACATATGCACC
+>B7_610:1:139:152:856
+AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT
+>B7_610:1:139:152:856
+CTAAGCCGTTCTATTTGTAATGAAAACTATATTTA
+>B7_610:1:37:652:403
+CCCCTCTAAGCCGTTCTATTTGTAATGAAAACTAT
+>B7_610:1:37:652:403
+TTTTGTCAGTTACCAAATGTGTTTATTACCAGAGG
+>B7_610:2:189:831:878
+AGAAAAAAAAACCTGTCAAACACGAATGTTATGCC
+>B7_610:2:189:831:878
+AGGAAGATACATTGCAAGACAGACTTCATCAAGAT
+>B7_610:2:194:688:289
+TCAGACCCTACACGAATGCGTCTCTACCACAGGGG
+>B7_610:2:194:688:289
+TGTGTGTTCTCATCAACCTCATACACACACATGGT
+>B7_610:2:6:529:366
+CATACAATGATAAAAAGATCAATTCAGCAAGAAGA
+>B7_610:2:6:529:366
+GCTACTCAATATATCCATGTAACAAATCTGCGCTT
+>B7_610:2:75:887:149
+TACAACAGGAACAAAACCTCATATATCAATATTAA
+>B7_610:2:75:887:149
+TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC
+>B7_610:3:102:825:507
+TGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACT
+>B7_610:3:102:825:507
+TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAAC
+>B7_610:3:120:63:653
+AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG
+>B7_610:3:120:63:653
+TCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCT
+>B7_610:3:137:895:681
+CTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATG
+>B7_610:3:137:895:681
+GCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGC
+>B7_610:3:148:340:479
+TCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTC
+>B7_610:3:148:340:479
+TTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGG
+>B7_610:3:182:23:585
+AGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTA
+>B7_610:3:182:23:585
+ATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGT
+>B7_610:3:5:863:302
+ACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT
+>B7_610:3:5:863:302
+TGAAAACAATGTTCCCCAGATACCATCCCTGTCTT
+>B7_610:3:82:998:566
+ATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCT
+>B7_610:3:82:998:566
+GCACATTACTACCCTGCAATTAATATAATTGTGTC
+>B7_610:3:84:101:328
+AATGGACCTGTGATATCTGGATTCTGGGAAATTCT
+>B7_610:3:84:101:328
+TATAATACCTCTACATGGCTGATTATGAAAACAAT
+>B7_610:3:85:219:371
+GACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT
+>B7_610:3:85:219:371
+TAACATTACAACAGGAACAAAACCTCATATATCAA
+>B7_610:4:139:989:144
+ACTAAAAAAGCAAAAACAAAAACTATGCTAAGTAT
+>B7_610:4:139:989:144
+ACTACTAGACCTAAGAGGGATGAGAAATTACCTAA
+>B7_610:4:15:805:420
+ATGAAGAGACTATTCACATGTGAACCACACATTTA
+>B7_610:4:15:805:420
+GAACAGTTTAGGTATCAATTTGGTGTTCTTTGTAA
+>B7_610:4:198:59:675
+AATCTGCGCTTGTACTTCTAAATCTATAACAAAAT
+>B7_610:4:198:59:675
+ACTAAATACATATGCACCTAACACAAGACTATCCT
+>B7_610:4:67:317:249
+CTACATGGCTGATTATGAAATCTATGTTCCCCATA
+>B7_610:4:67:317:249
+TTCCCATCATGACGCACCGAACTTCCACGTCTCAT
+>B7_610:5:102:915:87
+AACAATAGACTAAACCAAGCAGAAGAAAGAGGTTC
+>B7_610:5:102:915:87
+CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA
+>B7_610:5:120:596:847
+AACCTATGAGTCACAGGTATTCCTGAGGAAAAAGA
+>B7_610:5:120:596:847
+TCAACATGAAGGAAAAAAATTCTAAAATCAGCAAG
+>B7_610:5:136:260:254
+AAATTAACATTACAACAGGAACAAAACCTCATATA
+>B7_610:5:136:260:254
+GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAA
+>B7_610:5:147:68:353
+AACAACCTTGAGAACCCCAGGGAATTTGTCAATGT
+>B7_610:5:147:68:353
+CCTTTGATATCTGGATTCTGGGAAATTCTTCATCC
+>B7_610:5:51:904:391
+ACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAA
+>B7_610:5:51:904:391
+TATCTAAAGTCAACATGAAGGAAAAAAATTCTAAA
+>B7_610:5:7:761:623
+CCGGCATGGTTGTACTGGGCAATACATGAGATTAT
+>B7_610:5:7:761:623
+CTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTG
+>B7_610:6:107:252:533
+AGAGAAGGAGTAGCTATACTTATATCAGATAAAGC
+>B7_610:6:107:252:533
+CAAGACTACCCAGATTCATAAAACAAATACTACTA
+>B7_610:6:111:379:700
+ACACTAAAAGCCCATACTTTACTGCTACTCAATAT
+>B7_610:6:111:379:700
+CGCACTGGCAATATTTGTGTGTTTACTTTTTTGCA
+>B7_610:6:143:620:158
+ACTTTACTGCTACTCAATATATCCATGTAACAAAT
+>B7_610:6:143:620:158
+CAATGATAAAAAGATCAATTCAGCAAGAAGATATA
+>B7_610:6:148:776:486
+AACTGTGAGCCATCACAATGAACAACAGGAAGAAA
+>B7_610:6:148:776:486
+AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT
+>B7_610:7:116:157:612
+GTAAAAGTCAAAATTAAAGTTCAATACTCACCATC
+>B7_610:7:116:157:612
+TTAAGAGATATAGATTGGCAGTACAGATTTAAAAA
+>B7_610:7:117:857:942
+AGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCT
+>B7_610:7:117:857:942
+GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC
+>B7_610:7:158:943:467
+AAAGCTCTAACAATAGACTAAACCAAGCAGAAGAA
+>B7_610:7:158:943:467
+AGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTT
+>B7_610:7:15:696:693
+TCAAACACGAATGTTAATCCCTGCTAAACTAATCA
+>B7_610:7:15:696:693
+TCTAAAGTCAACATGAAGGAAAAAAATTCTAAAAT
+>B7_610:7:177:469:800
+AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA
+>B7_610:7:177:469:800
+TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAA
+>B7_610:7:26:749:174
+CAGATTCATAAAACAAATACTACTAGACCTAAGAG
+>B7_610:7:26:749:174
+TAACAAAAGTAAATAAAACACATAGCTAAAACTAA
+>B7_610:7:34:144:868
+AATTACCTAATTGGTACAATGTACAATATTCTGAT
+>B7_610:7:34:144:868
+AGCTAAGGAATGGGAAAGGTGTGGGGAAAAAAGTA
+>B7_610:7:35:378:681
+GCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCAT
+>B7_610:7:35:378:681
+GTGTGTTCTCATCAACCTCATACACACACATGGTT
+>B7_610:8:163:757:432
+CCAGATGAACCACACATTAATACTATGTTTCTCAT
+>B7_610:8:163:757:432
+GGTTTCCCATCATGAAGCACTGAACTTCCACGTCT
+>B7_610:8:68:570:705
+AACAGATTTAAAAACATGAACTAACTATATGCTGT
+>B7_610:8:68:570:705
+CATCATACAATGATAAAAAGATCAATTCAGCAAGA
+>B7_610:8:95:426:791
+CCAAATGTGTTTATTACCAGAGGGATGGAGGGAAG
+>B7_610:8:95:426:791
+GNTCCAGATTGCTTGTGGTCTGACAGGCTGCAACT
+>EAS112_32:7:113:809:364
+GATGCCCTCTTCTTCCAAAGATGAAACGCGTAACT
+>EAS112_32:7:113:809:364
+TATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTG
+>EAS112_32:7:135:401:735
+AATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC
+>EAS112_32:7:135:401:735
+CCTTACAAGCCAGAAGAGATTGGATCTAATTTTTG
+>EAS112_32:7:168:117:441
+TCATAACTATGAAGAGACTATTGCCAGATGAACCA
+>EAS112_32:7:168:117:441
+TCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTG
+>EAS112_32:7:272:328:400
+CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA
+>EAS112_32:7:272:328:400
+CTAAATACATATGCACCTAACACAAGACTACCCAG
+>EAS112_32:7:322:391:742
+ATCAATATTAACTTTGAATAAAAAGGGATTAAATT
+>EAS112_32:7:322:391:742
+CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA
+>EAS112_32:7:42:804:114
+ATACTTTACTGCTACTCAATATATCCATGTAACAA
+>EAS112_32:7:42:804:114
+TCAAATATTGCTAGTGGGAGTATAAATTGTTTTCC
+>EAS112_32:8:88:90:59
+ATAATACCTCTACATGTCTGATTATGAAAACAATG
+>EAS112_32:8:88:90:59
+TGCACCTCCCTGTTCACCTAGATGCTAGGAGGACA
+>EAS112_32:8:89:254:332
+AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT
+>EAS112_32:8:89:254:332
+GAATTAACCCAGTCAGACAAAAATAAAGAAAAAAG
+>EAS112_34:4:12:273:89
+AGTCTTTCCTGACAAGCAAATGCTAAGATAATTCA
+>EAS112_34:4:12:273:89
+CCATCAGAATAACAATGGGCTTCTCAGCGGAAACC
+>EAS112_34:4:17:989:186
+TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA
+>EAS112_34:4:17:989:186
+TTATGAATTAACCCAGTCAGACAAAAATAAAGAAA
+>EAS112_34:4:22:206:150
+AAAAAAGAGCAACTAGGTAAAAAATTAACATTACA
+>EAS112_34:4:22:206:150
+GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC
+>EAS112_34:4:74:570:695
+CACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGA
+>EAS112_34:4:74:570:695
+TTGGGAGATTTTTAATGATTCCTCAATGTTAAAAT
+>EAS112_34:4:92:412:435
+AGATTCATAAAACAAATACTACTAGACCTAAGAGG
+>EAS112_34:4:92:412:435
+CTACGCAAACAGAAACCAANTGAGAGAAGGAGTAG
+>EAS112_34:6:127:153:861
+CTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTC
+>EAS112_34:6:127:153:861
+TTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACAT
+>EAS112_34:6:130:865:838
+AAAGCATACAGTCATCTATAAAGGAAATCCCATCA
+>EAS112_34:6:130:865:838
+AATGCTAAGATAATTCATCATCACTAAACCAGTCC
+>EAS112_34:6:145:144:263
+TTTCGTTTTTTTTTTTTTTTTTTCCCCTTTCTTTT
+>EAS112_34:6:43:47:279
+AAAACTATGCTAAGTATTGGTAAAGATGTGGGGAA
+>EAS112_34:6:43:47:279
+TAGACCTAAGAGGGATGAGAAGTTACCTAATTGGT
+>EAS112_34:6:71:85:629
+CCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGA
+>EAS112_34:6:71:85:629
+TTGACACCCAACTAATATTTGTCTGAGCAAAACAG
+>EAS112_34:6:75:615:555
+AAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGG
+>EAS112_34:6:75:615:555
+TGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAA
+>EAS112_34:7:118:523:591
+GAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTC
+>EAS112_34:7:118:523:591
+GGTCTGACAGGCTGCAACTGTGAGCCATCACAATG
+>EAS112_34:7:141:80:875
+AGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAA
+>EAS112_34:7:141:80:875
+AGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCA
+>EAS112_34:7:142:457:584
+GGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGT
+>EAS112_34:7:142:457:584
+TGAAGAGACTATTTCCAGATGAACCACACATTAAT
+>EAS112_34:7:71:62:254
+AAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGT
+>EAS112_34:7:71:62:254
+GGTTACACTAAAAGCCCATACTTTCCTGCTACTCA
+>EAS112_34:7:86:498:373
+CACTAATGCGCTCCACGCCCAAGCCCTTCTCACAG
+>EAS112_34:7:86:498:373
+GATACCATCCCTGTCTTACTTCCAGCTCACCAGAG
+>EAS112_34:7:96:489:453
+AAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT
+>EAS112_34:7:96:489:453
+AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA
+>EAS112_34:8:103:812:255
+ATGTTAAAATGTCTATTTTTGTCTTGACACCCAAC
+>EAS112_34:8:103:812:255
+TGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCC
+>EAS112_34:8:174:557:872
+GGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAAT
+>EAS112_34:8:174:557:872
+TGGTACAATGTACAATATTCTGATGATGGTTACAC
+>EAS112_34:8:179:13:782
+GACAGTCTACAACTGTGAGCCATCACAATGAACAA
+>EAS112_34:8:179:13:782
+TGATGCCCTCTTCTTCCAAAGATGAAACGCGTAAC
+>EAS112_34:8:30:816:90
+ACTACCCTGCAATTAATATAATTGTGTCCATGTAC
+>EAS112_34:8:30:816:90
+AGGGAGGTGCACTAATGCGCTCCACGCCCCAGCCC
+>EAS112_34:8:45:800:733
+ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA
+>EAS112_34:8:45:800:733
+ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG
+>EAS112_34:8:4:841:339
+ACCTCATACACACACATGGTTTAGGGGTATAATAC
+>EAS112_34:8:4:841:339
+CTTCAGACCCTACACGAATGCGTCTCTACCACAGG
+>EAS114_26:1:113:367:659
+AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA
+>EAS114_26:1:113:367:659
+CACAGGTATTCCTGAGGAAAAAGAAAAAGCGAGAA
+>EAS114_26:1:155:807:19
+AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG
+>EAS114_26:1:155:807:19
+CAACAGTAAAATAAAACAAAGGAGGTCATCATACA
+>EAS114_26:1:171:527:247
+AACAAATGCTACTAGACCTAAGAGGGATGAGAAAT
+>EAS114_26:1:171:527:247
+AGAAGGAGTAGCTAGACTTATATCAGATAAAGCAC
+>EAS114_26:1:324:238:736
+AGACAGACTTCATCAAGATATGTAGTCATCAGACT
+>EAS114_26:1:324:238:736
+TCAAGAAGTATGAGATTATGTAAAGTAACTGAACC
+>EAS114_26:1:35:522:294
+GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA
+>EAS114_26:1:35:522:294
+TTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTT
+>EAS114_26:1:99:212:522
+ACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTT
+>EAS114_26:1:99:212:522
+AGAGACTATTGCCAGATGAACCACACATTAATACT
+>EAS114_26:2:130:609:467
+AAATTCCCCCACTTAAGAGATATAGATTGGCAGAA
+>EAS114_26:2:130:609:467
+CAATACTCACCATCATAAATACACACAAAAGTACA
+>EAS114_26:2:214:950:32
+ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT
+>EAS114_26:2:214:950:32
+AGAAGATATAACCATCCTACTAAATACATATGCAC
+>EAS114_26:2:237:497:165
+GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC
+>EAS114_26:2:237:497:165
+TACAGTCATCTATAAAGGAAATCCCATCAGAATAA
+>EAS114_26:2:315:219:7
+GAAAGAGGCTCAAAGAATGCCAGGAAGATACATTG
+>EAS114_26:2:329:458:365
+GTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGT
+>EAS114_26:2:329:458:365
+TTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGG
+>EAS114_26:2:73:513:102
+GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC
+>EAS114_26:2:73:513:102
+TGAAAACAATGTTCCCCAGATACCATCCCTGTCTT
+>EAS114_26:3:117:284:589
+ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA
+>EAS114_26:3:117:284:589
+GAACCTATGAGTCACAGGTATTCCTGAGGAAAAAG
+>EAS114_26:3:284:261:124
+ACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTG
+>EAS114_26:3:287:665:495
+GAATTGTAAAAGTCAAAATTAAAGTTCAATACTCA
+>EAS114_26:3:287:665:495
+TGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAA
+>EAS114_26:4:100:238:596
+ATGAGAAATTACCTAATTGGTACAATGTACAATAT
+>EAS114_26:4:100:238:596
+CAAAAACTATTCTAAGTATTGGTAAAGATGTGGGG
+>EAS114_26:4:110:840:431
+CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA
+>EAS114_26:4:110:840:431
+GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA
+>EAS114_26:4:123:1001:580
+AGGTTTTATAAAACAATTAATTGAGACTACAGAGC
+>EAS114_26:4:123:1001:580
+GGGAANTAAAGTCAAGTCTTTCCTGACAAGCAAAT
+>EAS114_26:4:253:285:104
+CTCTCATTCACTCCAGCTCCCTGTCACCCAATGGA
+>EAS114_26:4:253:285:104
+GTGATGTGTGTTCTCATCAACCTCATACACACACA
+>EAS114_26:4:306:388:342
+CCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACT
+>EAS114_26:4:306:388:342
+GGGGAAGACATAATCCCACGCTTCCTATGGAAAGG
+>EAS114_26:4:40:352:151
+ATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCT
+>EAS114_26:4:40:352:151
+TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT
+>EAS114_26:5:139:331:63
+GACACCCAACTAATATTTGTCTGAGCAAAACAGTC
+>EAS114_26:5:139:331:63
+TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT
+>EAS114_26:5:228:189:826
+AAGCCCTTCTCACAGTTTCTGCCCCCCGCATGGTT
+>EAS114_26:5:228:189:826
+ATCATGACTCTATCCCAAATTCCCAATTACGTCCT
+>EAS114_26:5:238:31:968
+ACACCATCCCTGTCTTACTTCCAGCTCCCCAGAGG
+>EAS114_26:5:238:31:968
+ATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTG
+>EAS114_26:5:43:114:617
+AATCATAAATACACACAAAAGTACAAAACTCACAG
+>EAS114_26:5:43:114:617
+AATGTTATGCCCTGCTAAACTAAGCATCATAAATG
+>EAS114_26:6:129:694:359
+CCCTGAGAGATTCTGCAGCCCAGATCCAGATTGCT
+>EAS114_26:6:129:694:359
+TGTCAGTTACCAAATGTGTTTATTACCCGAGGGAT
+>EAS114_26:6:140:253:322
+AATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTC
+>EAS114_26:6:140:253:322
+GAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAA
+>EAS114_26:6:183:697:555
+AAAGAATGCCAGGAAGATACATTGCCAGACAGACT
+>EAS114_26:6:183:697:555
+AGAAATCTTAGAAGCCAGAAGAGATTGGATCTAAT
+>EAS114_26:6:46:13:880
+AAAACCTCTTTAGTCTTGCTAGAGATTTAGACATC
+>EAS114_26:6:46:13:880
+AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA
+>EAS114_26:7:13:172:720
+AATTCATTTAAGAAATTACAAAATATAGTTGAAAG
+>EAS114_26:7:157:876:302
+AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA
+>EAS114_26:7:157:876:302
+CAACTAGGTAAAAAATTAACATTACAACACGAACA
+>EAS114_26:7:218:858:445
+AAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTT
+>EAS114_26:7:218:858:445
+GAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATA
+>EAS114_26:7:245:323:744
+ACCTCTACATGGCTGATTATGAAAACAATGTTCCC
+>EAS114_26:7:245:323:744
+GATTCTGGGAAATTCTTCATCCTGGACCCTGAGAG
+>EAS114_26:7:37:79:581
+TTAAAATTTAAAAAAAGTAAATAAAACACATAGCT
+>EAS114_26:7:37:79:581
+TTTTTTTTTTTTTTTTTTTTTTTCATGCCAGAAAA
+>EAS114_26:7:86:308:648
+GAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTG
+>EAS114_26:7:86:308:648
+TATTAGGAAATGCTTTACTGTCATAACTATGAAGA
+>EAS114_28:1:144:242:602
+ATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG
+>EAS114_28:1:144:242:602
+ATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAG
+>EAS114_28:1:168:389:889
+GAAATTACAAAATATAGTTGAAAGCTCTAACAATAG
+>EAS114_28:1:168:389:889
+TAACTGAACCTATGAGTCACAGGTATTCCTGAGGAA
+>EAS114_28:1:168:609:646
+GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG
+>EAS114_28:1:168:609:646
+GGTTACACTAAAAGCCCATACTTTACTGCTACTCAA
+>EAS114_28:1:220:801:282
+AACCTCATATATCAATATTAACTTTGAATAAAAAGG
+>EAS114_28:1:220:801:282
+AATTCATCATCACTAAACCAGTCCTATAAGAAATGC
+>EAS114_28:1:232:351:909
+ACATGGCTGATTATGAAATCAATGTTCCCCAGATGC
+>EAS114_28:1:232:351:909
+CCATCATGAAGCGCTGAACTTCCACGTCTCATCTAG
+>EAS114_28:1:28:708:463
+CCCAATGGACCTGTGATATCTGGATTCTGGGAAATT
+>EAS114_28:1:28:708:463
+GTATAATACCTCTACATGGCTGATTATGAAAACAAT
+>EAS114_28:2:114:938:216
+CTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT
+>EAS114_28:2:114:938:216
+GAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGA
+>EAS114_28:2:141:7:963
+CACTTTAAATCAACAACAGTAAAATAAAACAAAGGA
+>EAS114_28:2:141:7:963
+TACAATGTACAATATTCTGATGATGGTTACACTAAA
+>EAS114_28:2:149:650:44
+CGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCC
+>EAS114_28:2:149:650:44
+CTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTC
+>EAS114_28:2:167:905:852
+AGAAAAGCATACAGTCATCTATAAAGAAAATCCCAT
+>EAS114_28:2:167:905:852
+CAAATGCTAAGATAATTCATCATCACTAAACCAGTC
+>EAS114_28:2:251:819:772
+TCCCCCACTTAAGAGATATAGATTGGCAGAACAGAT
+>EAS114_28:2:251:819:772
+TTCAATACTCACCATCATAAATACACACAAAAGTAC
+>EAS114_28:2:28:474:566
+ACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG
+>EAS114_28:2:28:474:566
+TGAACCACACATTAATACTATGTTTCTTATCTGCAC
+>EAS114_28:2:329:437:643
+AAGATACATTGCAAGACAGACTTCATCAAGATATGT
+>EAS114_28:2:329:437:643
+TTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAAC
+>EAS114_28:2:55:562:403
+CAACAGGAACAAAACCTCATATATCAATATTAACTT
+>EAS114_28:2:55:562:403
+CAAGCAAATGCTAAGATAATTCATCATCACTAAACC
+>EAS114_28:3:110:984:98
+AAGGAGGTCATCATACAATGATAAAAAGATCAATTC
+>EAS114_28:3:110:984:98
+ACTAAAACCCCATACTTTACTGCTACTCAATATATC
+>EAS114_28:3:173:627:465
+GTAAACTCTCAAATATTGCTAGTGGGAGTATAAATT
+>EAS114_28:3:173:627:465
+TGATGGTTACACTAAAAGCCCATACTTTACTGCTAC
+>EAS114_28:3:176:402:458
+AAATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA
+>EAS114_28:3:176:402:458
+CCTAAGAGGGATGAGAAATTACCTAATTGGTACAAT
+>EAS114_28:3:202:275:776
+CAAATACTACTAGACCTAAGAGGGATGAGAAATTAC
+>EAS114_28:3:202:275:776
+TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAG
+>EAS114_28:3:250:628:423
+CAATGGGCTTCTCAGCGGAAACCTTACAAGCCAGAA
+>EAS114_28:3:250:628:423
+CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG
+>EAS114_28:3:279:763:945
+CTGCACATTACTACCCTGCAATTAATATAATTGTGT
+>EAS114_28:3:279:763:945
+GCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACT
+>EAS114_28:3:308:509:948
+AATATATCCATGTAACAAATCTGCGCTTGTACTTCT
+>EAS114_28:3:308:509:948
+AATTCAGCAAGAAGATATAACCATCCTACTAAATAC
+>EAS114_28:3:32:492:907
+CAAACACGAATGTTATGCCCTGCTAAACTAAGCATC
+>EAS114_28:3:32:492:907
+TGTAGTCATCAGACTATCTAAAGTCAACATGAAGGA
+>EAS114_28:3:78:773:660
+ATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGA
+>EAS114_28:3:78:773:660
+CCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCC
+>EAS114_28:4:13:701:55
+AAACCAAATGAGAGAAGGAGTAGCTATACTTATATC
+>EAS114_28:4:13:701:55
+TTCATAAAACAAATACTACTAGACCTAAGAGGGATG
+>EAS114_28:4:149:572:877
+ATGTAAAGTAACTGAACCTATGAGTCACAGGTATTC
+>EAS114_28:4:149:572:877
+GAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA
+>EAS114_28:4:215:246:640
+AAAAGCATACAGTCATCTATAAAGGAAATCCCATCA
+>EAS114_28:4:215:246:640
+AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT
+>EAS114_28:4:305:707:258
+AAAAAGATGTTCTACGCAAGCAGAAACCAAATGAGA
+>EAS114_28:4:305:707:258
+GAACAAAACCTCATATATCAATATTAACTTTGAATA
+>EAS114_28:4:322:631:245
+CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC
+>EAS114_28:4:322:631:245
+TATTTTTGTCTTGACACCCAACTAATATTTGTCTGA
+>EAS114_28:4:9:55:730
+ATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGC
+>EAS114_28:4:9:55:730
+CAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCT
+>EAS114_28:5:104:350:749
+AAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTT
+>EAS114_28:5:104:350:749
+TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCG
+>EAS114_28:5:11:868:62
+TCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGT
+>EAS114_28:5:11:868:62
+TTTACTGTCATAACTATGAAGAGACTATTGCCAGAT
+>EAS114_28:5:163:832:715
+TAAAAACATGAACTAACTATATGCTGTTTACAAGAA
+>EAS114_28:5:163:832:715
+TAAAACAAAGGAGGTCATCATACAATGATAAAAAGA
+>EAS114_28:5:206:671:49
+ACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAA
+>EAS114_28:5:206:671:49
+GCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCA
+>EAS114_28:5:209:778:588
+AACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT
+>EAS114_28:5:209:778:588
+TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT
+>EAS114_28:5:23:944:377
+AATGTTATGCCCTGCTAAACTAAGCATCATAAATGA
+>EAS114_28:5:23:944:377
+AGTACAAAACTCACAGGTTTTATAAAACAATTAATT
+>EAS114_28:6:11:151:750
+GTTTTTATTTTTTTCCTCTCTCTTTTTTTTTTTTTT
+>EAS114_28:6:155:68:326
+CCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAA
+>EAS114_28:6:155:68:326
+GTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTG
+>EAS114_28:6:175:705:982
+CATGGTTTAGGGGTATAATACCTCTACATGGCTGAT
+>EAS114_28:6:175:705:982
+CTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAG
+>EAS114_28:6:185:87:475
+AAGAAACTCATTAATAAAGACATGAGTTCAGGTAAA
+>EAS114_28:6:185:87:475
+ATTGAGACTACAGAGCAACTAGGTAAAAAATTAACA
+>EAS114_28:6:187:996:432
+TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTC
+>EAS114_28:6:187:996:432
+TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAG
+>EAS114_28:6:51:506:878
+TAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCT
+>EAS114_28:6:51:506:878
+TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACT
+>EAS114_28:6:54:263:585
+TGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATC
+>EAS114_28:6:54:263:585
+TGTCCATGTACACACGCTGTCCTATGTACTTATCAT
+>EAS114_28:7:133:514:754
+AGCTATACTTATATCAGATAAAGCACACTTTAAATC
+>EAS114_28:7:133:514:754
+TAAATTTGAATAAAAAGGGATTAAATTCCCCCACTT
+>EAS114_28:7:157:786:424
+GCTTTACTGTCATAACTATGAAGAGACTATTGCCAG
+>EAS114_28:7:157:786:424
+TTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAG
+>EAS114_28:7:178:276:693
+GTTCAGAACTTGAAGACAAGTCTCTTATGAATTAAC
+>EAS114_28:7:178:276:693
+TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAAC
+>EAS114_28:7:215:863:521
+ACTCACCATCATAAATACACACAAAAGTACAAAACT
+>EAS114_28:7:215:863:521
+TAAGAGATATAGATTGGCAGAACAGATTTAAAAACA
+>EAS114_28:7:242:354:637
+AACTATATTTATGCTATTCAGTTCTAAATATAGAAA
+>EAS114_28:7:242:354:637
+CCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT
+>EAS114_28:7:287:492:169
+CGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGG
+>EAS114_28:7:287:492:169
+GTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAAC
+>EAS114_28:7:57:324:546
+GTCATCTATAAAGGAAATCCCATCAGAATAACAATG
+>EAS114_28:7:57:324:546
+TAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATT
+>EAS114_30:1:134:379:893
+AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATA
+>EAS114_30:1:134:379:893
+CGCTGTCCTATGTACTTATCATGACTCTATCCCAA
+>EAS114_30:1:154:818:165
+GAACAGGGAGGTGCACTAATGCGCTCCACGCCCAA
+>EAS114_30:1:154:818:165
+TTCTTATCTGCACATTACTACCCTGCAATTAATAT
+>EAS114_30:1:176:168:513
+ATTTGTAATGAAAACTATATTTATGCTATTCAGTT
+>EAS114_30:1:176:168:513
+TTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTA
+>EAS114_30:1:188:863:790
+CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTC
+>EAS114_30:1:188:863:790
+TTAATTGAGACTACAGAGCAACTAGGTAAAAAATT
+>EAS114_30:1:243:10:911
+TATTCAGTTCTAAATATAGAAATTGAAACAGCTGT
+>EAS114_30:1:64:526:339
+ACATTACAACAGGAACAAAACCTCATATATCAATA
+>EAS114_30:1:64:526:339
+CAAATGAGAGAAGGAGTATCTATACTTATATCAGA
+>EAS114_30:2:111:142:21
+ATCAGAATAACAATGGGCTTCACAGCGGAAACCTT
+>EAS114_30:2:111:142:21
+CTTGCTAGAGATTTAGACATCTAAATGAAAGAGGC
+>EAS114_30:2:226:885:729
+AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG
+>EAS114_30:2:226:885:729
+GCTGAACTTACATCAGATAAAGCACACTTTAAATC
+>EAS114_30:2:272:750:698
+GTGTTTATTACCAGAGGGATGGAGGGATGACGGAC
+>EAS114_30:2:272:750:698
+TGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAG
+>EAS114_30:2:297:949:26
+ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA
+>EAS114_30:2:297:949:26
+CATATATCAATATTAACTTTGAATAAAAAGGGATT
+>EAS114_30:2:303:428:326
+AAAATTAAAATTTAACAAAAGTAAATAAAACACAT
+>EAS114_30:2:303:428:326
+TTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT
+>EAS114_30:2:30:887:404
+CAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTG
+>EAS114_30:2:30:887:404
+TTGCCTTCAGACCCTGCACGAATGCGTCTCTACCA
+>EAS114_30:2:315:412:921
+GTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATA
+>EAS114_30:2:315:412:921
+TTCTGATGATGGTTACACTACAAGCCCATACTGTA
+>EAS114_30:2:82:963:128
+ATTAAAGTTCAATACTCACCATCATAAATACACAC
+>EAS114_30:2:82:963:128
+GGCAGAACAGATTTAAAAACATGAACTAACTATAT
+>EAS114_30:3:139:117:262
+AGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTG
+>EAS114_30:3:139:117:262
+GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC
+>EAS114_30:3:14:697:541
+TAAAAGCAGCAAGAGAAAAGCATACAGTCATCTAT
+>EAS114_30:3:14:697:541
+TTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAA
+>EAS114_30:3:161:366:544
+CTTCTGTAATTGAAAAATTCATTTAAGAAATTACA
+>EAS114_30:3:181:582:435
+CAAGTCTTTCCTGACAAGCAAATGCTAAGATAATT
+>EAS114_30:3:181:582:435
+GAAATCCCATCAGAATAACAATGGGCTTCTCAGCA
+>EAS114_30:3:187:294:947
+ACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACT
+>EAS114_30:3:187:294:947
+AGAGACTATTGCCAGATGAACCACACATTAATACT
+>EAS114_30:3:215:840:760
+AAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACT
+>EAS114_30:3:215:840:760
+CTGATGATGGTTACACTAAAAGCCCATACTTTCCT
+>EAS114_30:3:24:195:604
+TCACAGTTTCTGCCCCCAGCATGGTTGTACTGTGC
+>EAS114_30:3:24:195:604
+TGTCCTATGTACTTATCATGACTCTATCCCAAATT
+>EAS114_30:3:302:288:657
+AGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGG
+>EAS114_30:3:302:288:657
+CCAGATGAACCACACATTAATACTATGTTTCTTAT
+>EAS114_30:3:35:361:546
+TGCACTAATGCGCTCCACGCCCAAGCCCTTCTCAC
+>EAS114_30:3:35:361:546
+TTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACG
+>EAS114_30:3:39:348:594
+AAGCATCATAAATGAAGGGGAAATAAAGTCAAGTC
+>EAS114_30:3:39:348:594
+CTAAAATCAGCAAGAGAAAAGCATACAGTCATCTA
+>EAS114_30:4:183:852:253
+ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT
+>EAS114_30:4:183:852:253
+CCATTTCTTTTGGCATTTGCCTTCAGACCCTACAC
+>EAS114_30:4:317:378:535
+AGCTTGGGCTGTAATGATGCCCCTTGGCCATCACC
+>EAS114_30:4:317:378:535
+GCTGTCCTATGTACTTATCATGACTCTATCCCAAA
+>EAS114_30:4:327:795:103
+AACCTTGAGAACCCCAGGGAATTTGTCAATGTCAG
+>EAS114_30:4:327:795:103
+ACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGC
+>EAS114_30:4:328:537:640
+AAGTATGAGATTATGTAAAGTAACTGAACCTATGA
+>EAS114_30:4:328:537:640
+GGAAGATACATTGCAAGACAGACTTCATCAAGATA
+>EAS114_30:5:327:991:508
+ACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCA
+>EAS114_30:5:327:991:508
+TCTGGATTCTGGGAAATTCTTCATCCTGGACCCTG
+>EAS114_30:5:32:461:154
+ACAGGTTTTATAAAACAATTAATTGAGACTACAGA
+>EAS114_30:5:32:461:154
+TTACAAGAAACTCATTAATAAAGACATGAGTTCAG
+>EAS114_30:6:137:741:866
+ACACTAAAAGCCCATACTTTACTGCTACTCAATAT
+>EAS114_30:6:137:741:866
+GATGAGGGGAAAAAAGTAAACTCTCAAATATTGCT
+>EAS114_30:6:157:42:763
+TCTGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCG
+>EAS114_30:6:157:42:763
+TTTCTTATCTGCACATTACTACCCTGCAATTATTA
+>EAS114_30:6:163:312:891
+CCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGC
+>EAS114_30:6:163:312:891
+TTCCCCAGATACCGTCCCTGTCTTACTTCCAGCTC
+>EAS114_30:6:214:565:337
+AAATGAAAGAGGCTCAAAGAATGCCAGGAAGATAC
+>EAS114_30:6:214:565:337
+CAGAATAACAATGGGCTTCTCAGCAGAAACCTTAC
+>EAS114_30:6:220:809:850
+GGGGGGAAAAAGATGTGCTACACAAAAAGATTCCA
+>EAS114_30:6:220:809:850
+TTCATAAAACAAATACTACTAGACCTAAGAGGGAT
+>EAS114_30:6:238:803:383
+ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA
+>EAS114_30:6:238:803:383
+ACTCATTAATAAAGACATGAGTTCAGGTAAAGGGG
+>EAS114_30:6:243:209:110
+AAAACATGAACTAACTATATGCTGTTTACAAGAAA
+>EAS114_30:6:243:209:110
+CACAGGTTTTATAAAACAATTAATTGAGACTACAG
+>EAS114_30:6:277:397:932
+TTTCTTTTCACTTTTTTTTTTTTTTTTTTTTACTT
+>EAS114_30:6:290:146:36
+CTTTCCCATCCCCCGGTCCCTGCCCCATCTCTTGT
+>EAS114_30:6:290:146:36
+TTATCATGACTCTATCCCAAATTCCCAATTACGTC
+>EAS114_30:6:326:309:149
+CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCA
+>EAS114_30:6:326:309:149
+CTCCCTGTCACCCAATGGACCTGTGATATCTGGAT
+>EAS114_30:6:41:461:436
+TAAAGTAACTGAACCTATGAGTCACAGGTATTCCT
+>EAS114_30:6:41:461:436
+TAGTCATCAGACTATCTAAAGTCAACATGAAGGAA
+>EAS114_30:6:49:656:507
+AAGGAAATCCCATCAGAATAACAATGGGCTTCTCA
+>EAS114_30:6:49:656:507
+TCCTGACAAGCAAATGCTAAGATAATTCATCATCA
+>EAS114_30:6:4:665:771
+GAATAAAAAGGGATTAAATTCCCCCACTTAAGAGA
+>EAS114_30:6:4:665:771
+GTGCTTTATCTGATATCAATGCCGATAAACTGCCT
+>EAS114_30:6:62:386:959
+AAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGC
+>EAS114_30:6:62:386:959
+AATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGT
+>EAS114_30:7:269:944:220
+ATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAA
+>EAS114_30:7:269:944:220
+TGGGCAATACATGAGATTATTAGGAAATGCTTTAC
+>EAS114_30:7:283:799:560
+ACATAGACCCCCTTGCAACAACCTTGAGAACCCCA
+>EAS114_30:7:283:799:560
+GGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGAC
+>EAS114_30:7:310:155:312
+CAGCAAGAGAAAAGCATACAGTCATCTATAAAGGA
+>EAS114_30:7:310:155:312
+CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT
+>EAS114_30:7:319:11:255
+TACCCAGATTCATAAAACAAATACTACTAGACCTA
+>EAS114_30:7:319:11:255
+TCTATAAAAAAATTAAAATTTAACAAAAGTAAATA
+>EAS114_30:7:59:871:351
+GTAAAAAATTAACATTACAACAGGAACAAAACCTC
+>EAS114_30:7:59:871:351
+TAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAG
+>EAS114_30:7:71:311:202
+TATGAGATTATGTAAAGTAACTGAACCTATGAGTC
+>EAS114_30:7:71:311:202
+TCAAGATATGTAGTCATCAGACTATCTAAAGTCAA
+>EAS114_32:1:199:760:42
+ACCCAATTAATATTTTTCTTAGCAAAACAGTCTAG
+>EAS114_32:1:199:760:42
+CTCTCTAATTTTTGCTGCTTCCATGTCTTACTCTG
+>EAS114_32:1:208:971:600
+AAAAAAACCTGTCAAACACGAATGTTATGCCCTGC
+>EAS114_32:1:208:971:600
+AGATATGTAGTCATCAGACTATCTAAAGTCAACAT
+>EAS114_32:2:163:618:570
+AGGCTGCAACTGTGAGCCATCACAATGAACAACAG
+>EAS114_32:2:163:618:570
+GGAAAGCTGTCAACGCTTCTAGCCATTTCTTTTGG
+>EAS114_32:2:197:170:559
+CTCATTCACTCCAGCTCCCTGTCACCCAATGGACC
+>EAS114_32:2:197:170:559
+TTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAACA
+>EAS114_32:2:247:900:123
+AAAACATGAACTAACTATATGCTGTTTACAAGAAA
+>EAS114_32:2:247:900:123
+AATTCAGCAAGAAGATATAACCATCCTACTAAATA
+>EAS114_32:2:283:577:398
+ACAATGGGCTTCTCAGCGGAAACCTTACAAGCCAG
+>EAS114_32:2:283:577:398
+CTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA
+>EAS114_32:2:306:119:56
+CTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACT
+>EAS114_32:2:306:119:56
+TCCATGTACACACGCTGTCCTATGTACTTATCATG
+>EAS114_32:3:236:475:254
+AGATAAAGCACACTTTAAATCAACAACAGTAAAAT
+>EAS114_32:3:236:475:254
+TTCCCCCACTTAAGAGATATAGATTGGCAGAACAG
+>EAS114_32:3:307:113:346
+AATTCAGCAAGAAGATATAACCATCCTACTAAATA
+>EAS114_32:3:307:113:346
+ATGCTGTTTACAAGAAACTCATTAATAAAGACATG
+>EAS114_32:4:156:21:69
+AAAAATGAACAGAGCTTTCAAGAAGTATGAGATTA
+>EAS114_32:4:156:21:69
+TTGCAAGACAGACTTCATCAAGATATGTAGTCATC
+>EAS114_32:4:20:41:138
+CATTTCTTTTGGCATTTGCCTTCAGACCCTACACG
+>EAS114_32:4:20:41:138
+GTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCT
+>EAS114_32:4:228:587:504
+GCACATTACGACCCGGCAAGGTGTATAATTGTGTC
+>EAS114_32:4:228:587:504
+GTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCA
+>EAS114_32:4:246:647:765
+GATCAATTCAGCAAGAAGATATAACCATCCTACTA
+>EAS114_32:4:246:647:765
+TATGCTGTTTACAAGAAACTCATTAATAAAGACAT
+>EAS114_32:4:42:923:169
+ACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAA
+>EAS114_32:4:42:923:169
+TTCCTATGTACTTATCATGAATCTATCCCAAATTC
+>EAS114_32:4:5:396:292
+ATACATATGCACCTAACACAAGACTACCCAGATTC
+>EAS114_32:4:5:396:292
+TAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGA
+>EAS114_32:4:7:282:424
+CAAAAACAAAAACTATGCTAAGTATTGTTAAAGAT
+>EAS114_32:4:7:282:424
+TAATTGGTACAATGTACAATATTCTGATGATGGTT
+>EAS114_32:5:109:199:592
+ACGAATATTATGCCCTGCTAAACTAAGCATCATAA
+>EAS114_32:5:109:199:592
+AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA
+>EAS114_32:5:182:313:319
+AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT
+>EAS114_32:5:182:313:319
+GATGAACCACACATTAATACTATGTTTCTTATCTG
+>EAS114_32:5:267:170:250
+CATAAAACAAATACTACTAGACCTAAGAGGGATGA
+>EAS114_32:5:267:170:250
+CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA
+>EAS114_32:5:78:583:499
+TTTACGCTATTCAGTACTAAATATAGAAATTGAAA
+>EAS114_32:6:122:342:296
+AAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCA
+>EAS114_32:6:122:342:296
+TCCTATGTACTTATCATGACTCTATCCCAAATTCC
+>EAS114_32:6:178:342:866
+AACAAATCTGCGCTTGTACTTCTAAATCTATAAAA
+>EAS114_32:6:178:342:866
+ATACATATGCACCTAACACAAGACTACCCAGATTC
+>EAS114_32:6:179:735:569
+ATGTTAAAATGTCTATTTTTGTCTTGACACCCAAC
+>EAS114_32:6:179:735:569
+CATCACCCGGTCCCTGCCCCATCTCTTGTAATCTC
+>EAS114_32:6:199:818:124
+AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT
+>EAS114_32:6:199:818:124
+ACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTG
+>EAS114_32:6:78:909:394
+ATTGCTTGGTGTCTGACAGGCTGCAACTGTGAGCC
+>EAS114_32:6:78:909:394
+TACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGA
+>EAS114_32:6:88:162:587
+GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC
+>EAS114_32:6:88:162:587
+TTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCT
+>EAS114_32:7:174:597:66
+TCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCT
+>EAS114_32:7:174:597:66
+TCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAAT
+>EAS114_32:7:201:959:19
+CTCTACATGGCTGATTATTAAAACAATGTTCCCCA
+>EAS114_32:7:201:959:19
+TATCTGGATTCTGGGAAATTCTTCATCCTGGACCC
+>EAS114_32:7:256:407:470
+AACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTC
+>EAS114_32:7:256:407:470
+CAGCATGGTTGTACTGGGCAATACATGAGATTATT
+>EAS114_39:1:12:884:219
+GAGCCATCACAATGAACAACAGGAAGAAAAGGTCT
+>EAS114_39:1:12:884:219
+GCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTT
+>EAS114_39:1:28:350:895
+ATATAGTTGAAAGCTCTAACAATAGACTAAACCAA
+>EAS114_39:1:28:350:895
+TATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAA
+>EAS114_39:1:43:1120:878
+ACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGT
+>EAS114_39:1:43:1120:878
+TAATTGTGTCCATGTACACACGCTGTCCTATGTAC
+>EAS114_39:1:70:147:84
+ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG
+>EAS114_39:1:70:147:84
+CCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGA
+>EAS114_39:1:71:636:533
+GCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGC
+>EAS114_39:1:71:636:533
+GTGGAAGACATAATCCCACGCTTCCTATGGAAAGG
+>EAS114_39:1:73:302:1574
+AAGGTTGTTGGGAGATTTTTAATGATTCCTCAATG
+>EAS114_39:1:73:302:1574
+CCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCT
+>EAS114_39:1:98:641:1040
+CAGAACAGATTTAAAAACATGAACTAACTATATGC
+>EAS114_39:1:98:641:1040
+TAAAACAAAGGAGGTCATCATACAATGATAAAAAG
+>EAS114_39:2:18:967:582
+AAGCCGTTCTATTTGTAATGAAAACTATATTTATG
+>EAS114_39:2:18:967:582
+ACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAG
+>EAS114_39:2:38:670:564
+CAGGTTTTATAAAACAATTAATTGAGACTACAGAG
+>EAS114_39:2:38:670:564
+CTAACTATATGCTGTTTACAAGAAACTCATTAATA
+>EAS114_39:2:41:576:1016
+CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC
+>EAS114_39:2:41:576:1016
+TGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTT
+>EAS114_39:2:57:1064:925
+TGACAAGCAAATGCTAAGATAATTCATCATCACTA
+>EAS114_39:2:5:1219:137
+AAAACTAAAAAAGCAAAAACAAAAACTATGCTAAG
+>EAS114_39:2:5:1219:137
+ACCTAAGAGGGATGAGAAATTACATAATTGGTACA
+>EAS114_39:3:11:1238:1728
+AGAGATTTAGACATCTAAATGAAAGAGGCTCAAAG
+>EAS114_39:3:11:1238:1728
+TCCCATCAGAATAACAATGGGCTTCTCAGCGGAAA
+>EAS114_39:3:55:464:146
+AAAAAGATCAATTCAGCAAGAAGATATAACCATCC
+>EAS114_39:3:55:464:146
+CTCAATATATCCATGTAACAAATCTGCGCTTGTAC
+>EAS114_39:3:6:1064:1805
+TAAAATTTAACAAAAGTAAATAAAACACATAGCTA
+>EAS114_39:3:6:1064:1805
+TTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTT
+>EAS114_39:3:88:84:1558
+AGTAACTGAACCTATGAGTCACAGGTATTCCTGTG
+>EAS114_39:3:88:84:1558
+ATCAGACTATCTAAAGTCAACATGAAGGAAAAAAA
+>EAS114_39:4:10:1312:1558
+AACTAACTATATGCTGTTTACAAGAAACTCATTAA
+>EAS114_39:4:10:1312:1558
+AGGTTTTATAAAACAATTAATTGAGACTACAGAGC
+>EAS114_39:4:30:432:228
+ATCCTACTAAATACATATGCACCTAACACAAGACT
+>EAS114_39:4:30:432:228
+GACATGAGTTCAGGGAAAGGGGTGGAAAAAGATGT
+>EAS114_39:4:30:570:902
+AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA
+>EAS114_39:4:30:570:902
+ATACTCACCATCATAAATACGCACAAAAGTACAAA
+>EAS114_39:4:43:1047:1626
+GATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGT
+>EAS114_39:4:43:1047:1626
+GTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAA
+>EAS114_39:4:58:271:612
+AGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTA
+>EAS114_39:4:58:271:612
+ATGAAGGAAAAAAATTCTAAAATCAGCAAGAGCAA
+>EAS114_39:4:93:77:1338
+GCTGCTTACAAGAAGCGCATTAATAAAGACATGAG
+>EAS114_39:4:93:77:1338
+GTCATCATACAATGAAAAAAAGATCAATTCAGCAA
+>EAS114_39:5:17:1222:783
+AAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCGT
+>EAS114_39:5:17:1222:783
+TGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATT
+>EAS114_39:5:42:1223:1087
+CAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTT
+>EAS114_39:5:42:1223:1087
+TTGTCTTGACACCCAACTAATATTTGTCTGAGCAA
+>EAS114_39:5:50:972:1286
+AGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTT
+>EAS114_39:5:50:972:1286
+TTGTGGTCTGACAGGCTGCAACTGTGAGCCATCAC
+>EAS114_39:5:61:1000:1534
+CTTGAAGACAAGTCTCTTATGAATTAACCCAGTCA
+>EAS114_39:5:61:1000:1534
+GGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGA
+>EAS114_39:5:93:312:331
+AACTCATTAATAAAGACATGAGTTCAGGTAAAGGG
+>EAS114_39:5:93:312:331
+ATCCTACTAAATACATATGCACCTAACACAAGACT
+>EAS114_39:6:13:1034:1144
+AAAGATGAAACGCGTAACTGCGCTCTCATTCACTC
+>EAS114_39:6:13:1034:1144
+AATTGAAACAGCTGTGTTTAGTGCCTTTGTTCACA
+>EAS114_39:6:34:380:815
+AAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGA
+>EAS114_39:6:34:380:815
+ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT
+>EAS114_39:6:71:644:1792
+AAAAGTACAAAACTCACAGGTTTTATAAAACAATT
+>EAS114_39:6:71:644:1792
+CCTGCTAAACTAAGCATCATAAATGAAGGGGAAAT
+>EAS114_39:6:76:282:1668
+AACAAAAACTATGCTAAGTATTGGTAAAGATGTGG
+>EAS114_39:6:76:282:1668
+TACAATGTACAATATTCTGATGATGGTTACACTAA
+>EAS114_39:6:7:492:1088
+ACAGGTTTTATAAAACAATTAATTGAGACTACAGA
+>EAS114_39:6:7:492:1088
+TGAACTAACTATATGCTGTTTACAAGAAACTCATT
+>EAS114_39:6:85:1224:625
+GAACTCCCCTGGAGGTCTGATGGCGTTTCTCCCTC
+>EAS114_39:6:85:1224:625
+GCTGCATCCCTGTCTTCCTCTGTCTTGATTTCCTT
+>EAS114_39:6:94:1273:1462
+AAGATGTTCTACGCAAACAGAAACCAAATGAGAGA
+>EAS114_39:6:94:1273:1462
+CCTAACACAAGACTACCCAGATTCATAAAACAAAT
+>EAS114_39:7:100:708:1984
+AGATGAACCACACATTAATACTATGTTTCTTATCT
+>EAS114_39:7:100:708:1984
+TACCACAGGGGGCTGCGCGGTTTCCCATCATGAAG
+>EAS114_39:7:23:1126:1886
+ACACTAAAAGCCCATACTTTACTGCTACTCAATAT
+>EAS114_39:7:23:1126:1886
+GGAGGTCATCATACAATGATAAAAAGATCAATTCA
+>EAS114_39:7:32:562:1695
+GATGATGGTTACACTAAAAGCCCATACTTTACTGC
+>EAS114_39:7:32:562:1695
+TAAAACAAAGGAGGTCATCATACAATGATAAAAAG
+>EAS114_39:7:57:1114:2032
+TAACTATATGCTGTTTACAAGAAACTCATTAATAA
+>EAS114_39:7:57:1114:2032
+TATTACAATGATAAAAAGATCAATTCAGCAAGAAG
+>EAS114_39:7:90:406:631
+CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG
+>EAS114_39:7:90:406:631
+TGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTT
+>EAS114_45:1:100:979:1863
+ATTACAAAACTCACAGGTTTTATAAAACAATTAAT
+>EAS114_45:1:100:979:1863
+TTATGCCCTGCTAAACTAAGCATCATAAATGAAGG
+>EAS114_45:1:12:1296:358
+CTTGAAAGCTTGGTCTGTAATGATGCCCCTTGGCC
+>EAS114_45:1:12:1296:358
+GTCCATGTACACACGCTGTCCTATGTACTTATCAT
+>EAS114_45:1:2:1422:1820
+CACCTAACACAAGACTACCCAGATTCATAAAACAA
+>EAS114_45:1:2:1422:1820
+TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC
+>EAS114_45:1:30:1882:1210
+ATCATCACTAAACCAGTCCTATAAGAAATGCTCAA
+>EAS114_45:1:30:1882:1210
+GCAGAAACCTTACAAGCCAGAAGAGATTGGATCTA
+>EAS114_45:1:33:1407:94
+TAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAG
+>EAS114_45:1:33:1407:94
+TTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGAT
+>EAS114_45:1:77:1000:1780
+AGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAG
+>EAS114_45:1:77:1000:1780
+TGAATTAACCCAGTCAGACAAAAATAAAGAAAAAA
+>EAS114_45:1:84:275:1572
+AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC
+>EAS114_45:1:84:275:1572
+TTATGCTATTCAGTTCTAAATATAGAAATTGAAAC
+>EAS114_45:1:95:1530:28
+AAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA
+>EAS114_45:1:95:1530:28
+AATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAA
+>EAS114_45:1:9:1289:215
+AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC
+>EAS114_45:1:9:1289:215
+TATGCTATTCAGTTCTAAATATAGAAATTGAAACA
+>EAS114_45:2:13:1507:1146
+AAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGA
+>EAS114_45:2:13:1507:1146
+CAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCT
+>EAS114_45:2:15:1497:1530
+AATTACGTCCTATCTTCTTCTTAGGGAAGAACAGC
+>EAS114_45:2:15:1497:1530
+TAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCC
+>EAS114_45:2:1:1140:1206
+TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT
+>EAS114_45:2:1:1140:1206
+TTTATTACCAGAGGGATGGAGGGAAGAGGGACGCT
+>EAS114_45:2:20:413:1334
+CCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTT
+>EAS114_45:2:20:413:1334
+TTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAG
+>EAS114_45:2:23:1754:796
+CTTCTAAATCTATAAAAAAATTAAAATTTAACAAA
+>EAS114_45:2:23:1754:796
+CTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTT
+>EAS114_45:2:33:1445:1357
+TATGAATTAACCCAGTCAGACAAAAATAAAGAAAA
+>EAS114_45:2:33:1445:1357
+TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA
+>EAS114_45:2:41:199:388
+AGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGG
+>EAS114_45:2:41:199:388
+TCTAAAGTCAACATGAAGGAAAAAAATTCTAAAAT
+>EAS114_45:2:49:163:904
+GCTCTCATTCACTCCAGCTCCCTGTCACCCAATGG
+>EAS114_45:2:49:163:904
+TCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTC
+>EAS114_45:2:54:1886:719
+CTGTTTACAAGAAACTCATTAATAAAGACATGAGT
+>EAS114_45:2:54:1886:719
+TTCAGCAAGAAGATATAACCATCCTACTAAATACA
+>EAS114_45:2:59:396:359
+GGGTATAATACCTCTACATGGCTGATTATGAAAAC
+>EAS114_45:2:59:396:359
+TCACCCAATGGACCTGTGATATCTGGATTCTGGGA
+>EAS114_45:2:76:1765:700
+AAAAAGGGATTAAATTCCCCCACTTAAGAGATATA
+>EAS114_45:2:76:1765:700
+GTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGT
+>EAS114_45:2:79:554:354
+AAATAAAACAAAGGAGGTCATCATACAATGATAAA
+>EAS114_45:2:79:554:354
+CAATGTACAATATTCTGATGATGGTTACACTAAAA
+>EAS114_45:3:26:1867:162
+ATATAACCATCCTACTAAATACATATGCACCTAAC
+>EAS114_45:3:26:1867:162
+ATATATCCATGTAACAAATCTGCGCTTGTACTTCT
+>EAS114_45:3:27:1881:486
+AAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTC
+>EAS114_45:3:27:1881:486
+CCAAAGATGAAACGCGTAACTGCGCTCTCATTCAC
+>EAS114_45:3:2:1200:1076
+CATTTGCCTTCAGACCCTACACGAATGCGTCTCTA
+>EAS114_45:3:2:1200:1076
+GATGTGTGTTCTCATCAACCTCATACACACACATG
+>EAS114_45:3:32:1379:738
+TTAAGAAATTACAAAATATAGTTGAAAGCTCTAAC
+>EAS114_45:3:35:896:1588
+CTAGACCTAAGAGGGATGAGAAATTACCTAATTGG
+>EAS114_45:3:35:896:1588
+GAGTAGCTATACTTATATCAGATAAAGCACACTTT
+>EAS114_45:3:39:208:644
+ATTGTAAAAGTCAAAATTAAAGTTCAATACTCACC
+>EAS114_45:3:39:208:644
+TTTGAATAAAAAGGGATTAAATTCCCCCACTTAAG
+>EAS114_45:3:3:1377:1663
+CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA
+>EAS114_45:3:3:1377:1663
+GGTGATGTGTGTTCTCATCAACCTCATACACACAC
+>EAS114_45:3:3:864:1888
+AATGTTATGCCCTGCTAAACTAAGCATCATAAATG
+>EAS114_45:3:3:864:1888
+CAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA
+>EAS114_45:3:41:653:1568
+AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT
+>EAS114_45:3:41:653:1568
+GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA
+>EAS114_45:3:44:1578:1674
+CCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT
+>EAS114_45:3:44:1578:1674
+GCTGCAACTGTGAGCCATCACAATGAACAACAGGA
+>EAS114_45:3:75:217:337
+GACAGGCTGCAACTGTGAGCCATCACAATGAACAA
+>EAS114_45:3:75:217:337
+GAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCT
+>EAS114_45:3:90:1403:1635
+TGTCTTGACACCCAACTAATATTTGTCTGAGCAAA
+>EAS114_45:3:90:1403:1635
+TTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCT
+>EAS114_45:4:48:310:473
+TGAATTAACCCAGTCAGACAAAAATAAAGAAAAAA
+>EAS114_45:4:48:310:473
+TTTAGTCTTGCTAGAGATTTAGACATCTAAATGAA
+>EAS114_45:4:73:1208:495
+AGATGAAACGCGTAACTGCGCTCTCATTCACTCCA
+>EAS114_45:4:73:1208:495
+TAAATATAGAAATTGAAACAGCTGTGTTTAGTGCC
+>EAS114_45:4:7:1347:375
+CTAAAAGCCCATACTTTACTGCTACTCAATATATC
+>EAS114_45:4:7:1347:375
+GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGA
+>EAS114_45:4:87:323:895
+ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT
+>EAS114_45:4:87:323:895
+GGTATAATACCTCTACATGGCTGATTATGAAAACA
+>EAS114_45:4:88:55:1187
+GTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAG
+>EAS114_45:4:88:55:1187
+GTCATCAGACTATCTAAAGTCAACATGAAGGAAAA
+>EAS114_45:5:56:1757:1319
+CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG
+>EAS114_45:5:56:1757:1319
+TTTATAAAACAATTAATTGAGACTACAGAGCAACT
+>EAS114_45:5:62:841:1994
+ATATCCATGTAACAAATCTGCGCTTGTACTTCTAA
+>EAS114_45:5:62:841:1994
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT
+>EAS114_45:5:66:959:1311
+CAGAGCTGCTGGCAAGCTAGAGGCCCATCTGGAGC
+>EAS114_45:5:66:959:1311
+GGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTT
+>EAS114_45:5:82:843:1838
+CTAGGTAAAAAATTAACATTACAACAGGAACAAAA
+>EAS114_45:5:82:843:1838
+TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA
+>EAS114_45:5:85:401:1190
+TACACACACATGGTTTAGGGGTATAATACCTCTAC
+>EAS114_45:5:85:401:1190
+TCACTCCAGCTCCCTGTCACCCAATGGACCTGTGA
+>EAS114_45:5:91:89:666
+GAAAACCTCTTTAGTCTTGCTAGAGATTTAGACAT
+>EAS114_45:6:14:1211:1332
+AGGAAGATACATTGCAAGACAGACTTCATCAAGAT
+>EAS114_45:6:14:1211:1332
+TTTCAAGAAGTATGAGATTATGTAAAGTAACTGAA
+>EAS114_45:6:37:156:134
+AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC
+>EAS114_45:6:37:156:134
+GGAAAAACTATTTGAGGAAGTAATTGGGGAAAACC
+>EAS114_45:6:39:956:676
+TAAAACAAATACTACTAGACCTAAGAGGGATGAGA
+>EAS114_45:6:39:956:676
+TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA
+>EAS114_45:6:44:77:1255
+CTCATTAATAAAGACATGAGTTCAGGTAAAGGGGT
+>EAS114_45:6:44:77:1255
+TAAAAAGATCAATTCAGCAAGAAGATATAACCATC
+>EAS114_45:6:45:1769:1130
+ACCCAATGGACCTGTGATATCTGGATTCTGGGAAA
+>EAS114_45:6:45:1769:1130
+TGTTCTCATCAACCTCATACACACACATGGTTTAG
+>EAS114_45:6:47:1791:444
+AAGAGGGATGAGAAATTACCTAATTGGTACAATGT
+>EAS114_45:6:47:1791:444
+TACTTATATCAGATAAAGCACACTTTAAATCAACA
+>EAS114_45:6:59:1548:1096
+CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTG
+>EAS114_45:6:59:1548:1096
+GTCCTATGTACTTATCATGACTCTATCCCAAATTC
+>EAS114_45:6:5:730:1436
+GAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAA
+>EAS114_45:6:5:730:1436
+TATTCAGTTCTAAATATAGAAATTGAAACAGCTGT
+>EAS114_45:6:86:693:234
+AAGTAACTGAACCTATGAGTCACAGGTATTCCTGA
+>EAS114_45:6:86:693:234
+GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA
+>EAS114_45:6:86:859:1779
+TTTTTTTCATTTCTCTTTTTTTTTTTTTTTTTTTT
+>EAS114_45:6:90:561:850
+ACAGGAACAAAACCTCATATATCAATATTAACTTT
+>EAS114_45:6:90:561:850
+TACGCAAACAGAAACCAAATGAGAGAAGGAGTAGC
+>EAS114_45:6:93:1475:542
+TGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCAT
+>EAS114_45:6:93:1475:542
+TTATCATGACTCTATCCCAAATTCCCAATTACGTC
+>EAS114_45:7:14:1256:204
+AAATGTCTATTTTTGTCTTGACACCCAACTAATAT
+>EAS114_45:7:14:1256:204
+TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC
+>EAS114_45:7:14:978:1296
+ATACAATGATAAAAAGATCAATTCAGCAAGAAGAT
+>EAS114_45:7:14:978:1296
+CAATATTCTGATGATGGTTACACTAAAAGCCCATA
+>EAS114_45:7:24:1374:211
+AGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTG
+>EAS114_45:7:24:1374:211
+TGCAATTAATATAATTGTGTCCATGTACACACGCT
+>EAS114_45:7:2:168:1878
+AAAAAACCTGGCAAACACGAATGTTATGACATGTN
+>EAS114_45:7:2:168:1878
+TAAATACACACAAAAGTAGAAAACGCACCAGTTTT
+>EAS114_45:7:33:1566:588
+ACAGCTTAGGCATCAATTTGGTGTTCTGTGTAAAG
+>EAS114_45:7:33:1566:588
+TACTGTCATAACTATGAAGAGCCTATTGCCAGATG
+>EAS114_45:7:35:538:1882
+TATGCACCTAACACAAGACTACCCAGATTCATAAA
+>EAS114_45:7:35:538:1882
+TCTATAACAAAATTAAAATTTAACAAAAGTAAATA
+>EAS114_45:7:37:763:1437
+AAAGATGTTCTACGCAAACAGAAACCAAATGAGAG
+>EAS114_45:7:37:763:1437
+TAAAACAAATACTACTAGACCTAAGAGGGATGAGA
+>EAS114_45:7:45:1339:1807
+GACATCTAAATGAAAGAGGCTCAAAGAATGCCAGG
+>EAS114_45:7:69:1130:832
+ATAGTTGAAAGCTCTAACAATAGACTAAACCAAGC
+>EAS114_45:7:69:1130:832
+TCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAA
+>EAS114_45:7:6:758:988
+AAACAAAGGAGGTCATCATACAATGATAAAAAGAT
+>EAS114_45:7:6:758:988
+ATTCTGATGATGGTTACACTAAAAGCCCATACTTT
+>EAS114_45:7:88:451:1773
+ATAAATACACACAAAAGTACAAAACTCACAGGTTT
+>EAS114_45:7:88:451:1773
+ATTGGCAGAACAGATTTAAAAACATGAACTAACTA
+>EAS114_45:7:97:1584:777
+CCAGATGAACCACACATTAATACTATGTTTCTTAT
+>EAS114_45:7:97:1584:777
+GTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCC
+>EAS114_45:7:9:512:826
+AACTTCCACGTCTCATCTAGGGGAACAGGGAGGTG
+>EAS114_45:7:9:512:826
+ACCACACATTAATACTATGTTTCTTATCTGCACAT
+>EAS139_11:1:35:631:594
+ATCATGACTCTATCCCAAATTCCCAATTACGTCCT
+>EAS139_11:1:35:631:594
+ATGATGCCCCTTGGCCATCACCCGGTCCCTGCCCC
+>EAS139_11:1:59:742:549
+ACAAGCAAATGCTAAGATAATTCATCATCACTAAA
+>EAS139_11:1:59:742:549
+TTAACATTACAACAGGAACAAAACCTCATATATCA
+>EAS139_11:1:81:1019:558
+ACAAAACTCACAGGTTTTATAAAACAATTAATTGA
+>EAS139_11:1:81:1019:558
+TGAACTAACTATATGCTGTTTACAAGAAACTCATT
+>EAS139_11:1:84:92:1246
+GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG
+>EAS139_11:1:84:92:1246
+GTTACACTAAAAGCCCATACTTTACTGCTACTCAA
+>EAS139_11:2:31:628:1820
+AACCAGTCCTATAAGAAATGCTCAAAAGAATTGTA
+>EAS139_11:2:31:628:1820
+CAGGAACAAAACCTCATATATCAATATTAACTTTG
+>EAS139_11:2:42:333:516
+AGACAAGTCTCTTATGAATTAACCCAGTCAGACAA
+>EAS139_11:2:42:333:516
+TCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG
+>EAS139_11:2:55:296:1457
+CACTTTAAATCAACAACAGTAAAATAAAACAAAGG
+>EAS139_11:2:55:296:1457
+CCCACTTAAGAGATATAGATTGGCAGAACAGATTT
+>EAS139_11:2:63:816:921
+AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT
+>EAS139_11:2:63:816:921
+TGAAGACAAGTCTCTTATGAATTAACCCAGTCAGA
+>EAS139_11:2:6:251:1557
+AAAACAATGTTCCCCAGATACCATCCCTGTCTTAC
+>EAS139_11:2:6:251:1557
+CCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTT
+>EAS139_11:2:71:83:58
+AAAGAAAAAAGAATTTTAAAAATGAACAGAGCTTT
+>EAS139_11:2:71:83:58
+CCAGGAAGATACATTGCAAGACAGACTTCATCAAG
+>EAS139_11:3:34:970:1374
+ATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGG
+>EAS139_11:3:34:970:1374
+CTGGACCCTGAGAGATTCTGCAGCCCAGATCCAGA
+>EAS139_11:3:43:1229:1855
+ATGTACAATATTCTGATGATGGTTACACTAAAAGC
+>EAS139_11:3:43:1229:1855
+CAACAGTAAAATAAAACAAAGGAGGTCATCATACA
+>EAS139_11:3:65:556:1505
+CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG
+>EAS139_11:3:65:556:1505
+TGCCTTCAGACCCTACACGAATGCGTCTCTACCAC
+>EAS139_11:3:81:12:1231
+AAAAAGCAAAAACAAAAACTATGCTAAGTATTGGT
+>EAS139_11:3:81:12:1231
+TTACCTAATTGGTACAATGTACAATATTCTGATGA
+>EAS139_11:4:26:137:1382
+AATGTTATGCCCTGCTAAACTAAGCATCATAAATG
+>EAS139_11:4:26:137:1382
+AGACTATCTAAAGTCAACATGAAGGAAAAAAATTC
+>EAS139_11:4:36:1184:994
+GCCAGAAGAGATTGGATCTAATTTTTGGACTTCTT
+>EAS139_11:4:36:1184:994
+TACATTGCAAGACAGACTTCATCAAGATATGTAGT
+>EAS139_11:4:36:1231:1381
+AAGAGATATAGATTGGCAGAACAGATTTAAAAACA
+>EAS139_11:4:36:1231:1381
+TAAAAGTCAAAATTAAAGTTCAATACTCACCATCA
+>EAS139_11:4:38:557:1441
+GATAAAAATAAAAAAGCAAAAACAAAAACTATGCT
+>EAS139_11:4:38:557:1441
+TAAGAGGGATGAGAAATTACCTAATTGGTACAATG
+>EAS139_11:4:50:30:15
+AGATTATGTAAAGTAACTTAACCTATGAGTCCAAG
+>EAS139_11:4:50:30:15
+TACATTGCAAGACAGTCGTCAGCAAGATATGTAGT
+>EAS139_11:4:63:527:1923
+GCTTTACTGTCATAACCATGAAGAGACTATTGCCA
+>EAS139_11:4:63:527:1923
+TACACGAATGCGTCTCTACCACAGGGGGCTGCGCG
+>EAS139_11:5:32:686:735
+AAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGG
+>EAS139_11:5:32:686:735
+CAGAAGAAAGAGGTTCANANNNTGANGACAAGTCT
+>EAS139_11:5:41:314:1173
+AAGAAAAAAAAACCTGTCAAACACGAATGTTATGC
+>EAS139_11:5:41:314:1173
+AATTAAAGTTCAATACTCACCATCATAAATACACA
+>EAS139_11:5:52:1278:1478
+GCTTGTACTTCTAAATCTATAACAAAATTAAAATT
+>EAS139_11:5:52:1278:1478
+GTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTT
+>EAS139_11:5:61:38:1182
+AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTC
+>EAS139_11:5:61:38:1182
+GTTTGATTTGGTGGAAGACATAATCCCACGCTTCC
+>EAS139_11:5:64:199:1288
+GTAAAATAAAACAAAGGAGGTCATCATACAATGAT
+>EAS139_11:5:64:199:1288
+TACAATGTACAATATTCTGATGATGGTTACACTAA
+>EAS139_11:5:78:775:555
+AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG
+>EAS139_11:5:78:775:555
+TTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAA
+>EAS139_11:6:11:285:1567
+ACATGGCTGATTATGAAAACAATGTTCCCCAGATA
+>EAS139_11:6:11:285:1567
+CCCTGAGAGATTCTGCAGCCCAGATCCAGATTGCT
+>EAS139_11:6:11:360:1577
+GAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCT
+>EAS139_11:6:11:360:1577
+TTTGGCATTTGCCTTCAGACCCTACACGAATGCGT
+>EAS139_11:6:13:682:680
+AGAATAACAATGGGCTTCTCAGCGGAAACCTTACA
+>EAS139_11:6:13:682:680
+ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA
+>EAS139_11:6:17:1179:393
+CTAATTGGTACAATGTACAATATTCTGATGATGGT
+>EAS139_11:6:17:1179:393
+TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA
+>EAS139_11:6:19:306:982
+ATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCA
+>EAS139_11:6:19:306:982
+GACAGACTTCATCAAGATATGTAGTCATCAGACTA
+>EAS139_11:6:75:946:1035
+AACCCCCTTGCAACAACCTTGAGAACCCCAGGGAA
+>EAS139_11:6:75:946:1035
+AATGGACCTGTGATATCTGGATTCTGGGAAATTCT
+>EAS139_11:6:82:164:1924
+GAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTT
+>EAS139_11:6:82:164:1924
+GCCCAGCACCAGATTGCTTGTGGTCTGACAGGCTG
+>EAS139_11:6:89:1151:1878
+CCATCACAATGAACAACAGGAAGAAAAGGTCTTTC
+>EAS139_11:6:89:1151:1878
+CTTTCAACGATTCTAGCCATTTCTTTTGGCATTTG
+>EAS139_11:7:24:1345:1627
+AGATTGGCAGAACAGATTTAAAAACATGAACTAAC
+>EAS139_11:7:24:1345:1627
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>EAS139_11:7:42:1091:1726
+CAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGT
+>EAS139_11:7:42:1091:1726
+TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT
+>EAS139_11:7:46:695:738
+CAAAGATGAAACGCGTAACTGCGCTCTCATTCACT
+>EAS139_11:7:46:695:738
+TGAAACAGCTGAGTTTAGCGCCTGTGTTCACATAG
+>EAS139_11:7:50:1229:1313
+ACATAGCTAAAACTAAAAAAGCAAAAACAAAAACT
+>EAS139_11:7:50:1229:1313
+TTTTTTCTTTTTTTTTTTTTTTTTTTTGCATGCCA
+>EAS139_11:7:53:458:581
+CTCAATTAATTGTTTTATAAAACCTGTGAGTTTTG
+>EAS139_11:7:53:458:581
+TTATGCCCTGCTAAACTAAGCATCATAAATGAAGG
+>EAS139_11:7:60:163:1612
+AGCAAGAGAAAAGCATACAGTCATCTATAAAGGAA
+>EAS139_11:7:60:163:1612
+GGGAACTAAAGTCAAGTCTTTCCTGACAAGCAAAT
+>EAS139_11:7:74:213:877
+AGATGTTCTACGCAAACAGAAACCAAATGAGAGAA
+>EAS139_11:7:74:213:877
+TTAACATTACAACAGGAACAAAACCTCATATATCA
+>EAS139_11:7:92:367:1495
+ACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGC
+>EAS139_11:7:92:367:1495
+CTGTCATAACTATGAAGAGACTATTGCCAGATGAA
+>EAS139_11:8:17:437:1378
+ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA
+>EAS139_11:8:17:437:1378
+ATCAATATTAACTTTGAATAAAAAGGGATTAAATT
+>EAS139_11:8:26:1221:222
+AAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCT
+>EAS139_11:8:26:1221:222
+CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA
+>EAS139_11:8:38:842:395
+GTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCA
+>EAS139_11:8:76:205:587
+GAATAACAATGGGCTTCTCAGCGGAAACCTTACGA
+>EAS139_11:8:76:205:587
+TGCTAGAGATTTAGACATCTAAATGAAAGAGGCTC
+>EAS139_11:8:82:566:1096
+CAAAAGGTGATGTGTGTTCTCATCAACCTCATACA
+>EAS139_11:8:82:566:1096
+CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCA
+>EAS139_11:8:96:1314:1448
+CCTATGAGTCACAGGTATTCCTGAGGAAAAATAAA
+>EAS139_11:8:96:1314:1448
+GTAGTCATCAGACTATCTAAAGTCAACATGAAGGA
+>EAS139_19:1:14:420:712
+TGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCC
+>EAS139_19:1:14:420:712
+TGTTGGTTTTCTGTTTCTTTGTTTGATTTTTTTGAAGACA
+>EAS139_19:1:1:1598:843
+TCAGCGGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT
+>EAS139_19:1:1:1598:843
+TGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATA
+>EAS139_19:1:36:481:1079
+GTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAA
+>EAS139_19:1:36:481:1079
+TCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAA
+>EAS139_19:1:40:1596:1433
+CCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAG
+>EAS139_19:1:40:1596:1433
+GCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTT
+>EAS139_19:1:47:352:1492
+AGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGCCCACTA
+>EAS139_19:1:47:352:1492
+TTTGTTTTGTATGGTGGAAGACATAATCCCACGCTTCCTA
+>EAS139_19:1:53:463:1132
+ATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCC
+>EAS139_19:1:53:463:1132
+ATTTAAAAACATGAACTAACTATATGCTGTTTACANGAAA
+>EAS139_19:1:58:726:1746
+AGATTGGCAGAACAGATTTAAAAACATGAACTAACTATAT
+>EAS139_19:1:58:726:1746
+CAATTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTC
+>EAS139_19:1:82:946:392
+CAATATATCCATGTAACAAATCTGCGCTTGTACTTCAAAA
+>EAS139_19:1:82:946:392
+GAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTC
+>EAS139_19:1:85:1521:58
+AAATTAACATTACAACAGGAACAAAACCTCATATATCAAT
+>EAS139_19:1:85:1521:58
+CTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACC
+>EAS139_19:1:87:1222:878
+TATAGGGCCTTTGTTCAAACCCCTTGCAACAACCTTGAGA
+>EAS139_19:1:87:1222:878
+TCAGCGCGTCACTCCGCTCTCATTCACCCCAGCTCCCTGT
+>EAS139_19:1:99:1632:76
+AAAGAAAAAAAACCCTGTCAAACACGAATGTTATGCCCTG
+>EAS139_19:1:99:1632:76
+TTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATA
+>EAS139_19:2:12:1335:1372
+GAAGAACAGATTTAAAAACATGAACTAACTATATGCTGTT
+>EAS139_19:2:12:1335:1372
+TAAAGTTCAATACTCACCATCATAAATACACACAAAAGTA
+>EAS139_19:2:29:1822:1881
+AGAAAAAAGAATTTTAAAAATGAACAGAGCTTTCAAGAAG
+>EAS139_19:2:29:1822:1881
+ATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA
+>EAS139_19:2:2:1217:398
+CAATTAATTGAGACTACAGAGCAACTAGGTAAAAAATTAA
+>EAS139_19:2:2:1217:398
+TAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGC
+>EAS139_19:2:33:1193:664
+GCCCCATCTCTTGTAATCTCTCTCCTTTTTTCTGCATCCC
+>EAS139_19:2:33:1193:664
+TATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAA
+>EAS139_19:2:57:1672:1890
+CCCCCCCCCCCCCCCCCAGCCACTGCGGCCCCCCCAGCCA
+>EAS139_19:2:57:1672:1890
+TATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTA
+>EAS139_19:2:82:154:1333
+TTAAAATTTAACAAAAGTAAATAAAACACACAGCTAAAAC
+>EAS139_19:2:82:154:1333
+TTTTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTTTTT
+>EAS139_19:3:10:349:1147
+GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAA
+>EAS139_19:3:24:1135:563
+CTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGA
+>EAS139_19:3:24:1135:563
+GCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACC
+>EAS139_19:3:4:1502:1911
+CTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTT
+>EAS139_19:3:4:1502:1911
+TACACACACATGGTTTAGGGGTATAATACCTCTACATGGC
+>EAS139_19:3:58:923:1915
+GCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA
+>EAS139_19:3:58:923:1915
+TATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCC
+>EAS139_19:3:5:538:401
+AAGGTGATGTGTGTTCTCATCAACCTCATACACACACATG
+>EAS139_19:3:5:538:401
+TTTGCCTTCACACCCTACACGAATGCGTCTCTGCCACAGG
+>EAS139_19:3:66:718:481
+AACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATG
+>EAS139_19:3:66:718:481
+AAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAAC
+>EAS139_19:3:73:1158:535
+AATAAAGATATGTAGTCATCAGACTATCTAAAGTCAACAT
+>EAS139_19:3:73:1158:535
+CCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTG
+>EAS139_19:3:73:936:1509
+CTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTTATTT
+>EAS139_19:3:73:936:1509
+TTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT
+>EAS139_19:3:75:732:442
+CGGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC
+>EAS139_19:3:75:732:442
+CTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGG
+>EAS139_19:3:87:133:930
+CAGTCTCAGGGCGCCGTCCGTTTCCTCCCATCTGGCCTCG
+>EAS139_19:3:87:133:930
+TTATCTGCACATTACTACCCTGCAATTAATATAATTGTGT
+>EAS139_19:3:88:1656:896
+AGGGAAGAGGGATGCTGAAGAACTTTGATGCCCTCTTCTT
+>EAS139_19:3:88:1656:896
+TGCAGCCCAGATCCAGATTGCTTGTGGTCTGACAGGCTGC
+>EAS139_19:4:13:1155:631
+ACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTG
+>EAS139_19:4:13:1155:631
+AGGGGTATAATACCTCTACATGGCTGATTATGAAAACAAT
+>EAS139_19:4:18:1335:1514
+ATTGGTACAATGTACAATATTCTGATGATGGTTACACTAA
+>EAS139_19:4:18:1335:1514
+CTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCAT
+>EAS139_19:4:1:156:196
+AAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGG
+>EAS139_19:4:1:156:196
+ACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTA
+>EAS139_19:4:26:1312:1400
+ACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAA
+>EAS139_19:4:26:1312:1400
+TATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAA
+>EAS139_19:4:26:274:1078
+AATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGAC
+>EAS139_19:4:26:274:1078
+GTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCT
+>EAS139_19:4:68:1122:79
+ATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCC
+>EAS139_19:4:68:1122:79
+TGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCT
+>EAS139_19:4:69:1593:819
+ATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTAC
+>EAS139_19:4:69:1593:819
+CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA
+>EAS139_19:4:77:1780:693
+GGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGAT
+>EAS139_19:4:77:1780:693
+TGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAA
+>EAS139_19:4:78:806:800
+AAATTAAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS139_19:4:78:806:800
+AACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGC
+>EAS139_19:5:29:411:1208
+AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC
+>EAS139_19:5:29:411:1208
+CCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAA
+>EAS139_19:5:40:758:116
+GTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAG
+>EAS139_19:5:40:758:116
+GTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGG
+>EAS139_19:5:4:939:2021
+AAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAAT
+>EAS139_19:5:4:939:2021
+GGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAAT
+>EAS139_19:5:57:366:844
+AAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGAT
+>EAS139_19:5:57:366:844
+TAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAAT
+>EAS139_19:5:61:1885:163
+AGCAAGAAGATATAACCATCCTACTAAATACATATGCACC
+>EAS139_19:5:61:1885:163
+ATACTTTACTGCTACTCAATATATCCATGTAACAAATCTG
+>EAS139_19:5:66:1381:181
+GGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGT
+>EAS139_19:5:66:1381:181
+TTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACT
+>EAS139_19:5:68:306:409
+AACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAG
+>EAS139_19:5:68:306:409
+CCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT
+>EAS139_19:5:70:318:1631
+TAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGA
+>EAS139_19:5:74:668:424
+GACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT
+>EAS139_19:5:74:668:424
+GTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAA
+>EAS139_19:5:89:525:113
+GACGCTGAAGAACTTTGATTCCCTCTTCTTCCAAAGATGA
+>EAS139_19:5:89:525:113
+TATTTATGCTATTCAGTTATAAATATAGAAATTGAAACAG
+>EAS139_19:5:95:944:247
+GGTACAATGTACAATATTCTGATGATGGTTACACTAAAAG
+>EAS139_19:5:95:944:247
+GTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCT
+>EAS139_19:6:21:1601:1666
+GAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAG
+>EAS139_19:6:21:1601:1666
+TATTACTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAA
+>EAS139_19:6:52:1455:1212
+CCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGT
+>EAS139_19:6:52:1455:1212
+TTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCT
+>EAS139_19:6:72:308:839
+AGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAG
+>EAS139_19:6:72:308:839
+ATCGTGGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT
+>EAS139_19:6:75:1503:1399
+CAAGAAGATATAACCATCCTACTAAATACATATGCACCTA
+>EAS139_19:6:75:1503:1399
+CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGC
+>EAS139_19:6:78:1029:512
+AGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGC
+>EAS139_19:6:78:1029:512
+TCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT
+>EAS139_19:6:82:1051:921
+GCAAATAGGTAAAAAATTAACATTACAACAGGAACAAAAC
+>EAS139_19:6:82:1051:921
+GGGGAAATAAAGTCAAGGCTTTCCTGACAAGCAAATGCTA
+>EAS139_19:6:84:438:1505
+ATTAATATAATTGTGTCCATGTACACACTCTGTCCTATGT
+>EAS139_19:6:84:438:1505
+GCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTC
+>EAS139_19:7:44:1807:833
+ATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTA
+>EAS139_19:7:44:1807:833
+CTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCAC
+>EAS139_19:7:85:262:751
+CCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAC
+>EAS139_19:7:85:262:751
+TACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACC
+>EAS139_19:7:92:288:1354
+TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATT
+>EAS139_19:7:92:288:1354
+TGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGT
+>EAS188_4:5:103:870:105
+AAACTAAGCATCATAAATGAAGGGGAAATAAAGTC
+>EAS188_4:5:103:870:105
+ATAAAACAATTAATTGAGACTACAGAGCAACTAGG
+>EAS188_4:5:166:776:590
+CTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCC
+>EAS188_4:5:166:776:590
+TAATTGTGTCCATGTACACACGCTGTCCTATGTAC
+>EAS188_4:5:202:326:680
+ACTTATCATGACTCTATCCCAAATTCTCAATTACG
+>EAS188_4:5:202:326:680
+GTAATGATGCCCCTTGGCCATCACCCGGTCCCTGC
+>EAS188_4:5:295:547:216
+ATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCT
+>EAS188_4:5:295:547:216
+TAAAAAATTAACATTACAACAGGAACAAAACCTCA
+>EAS188_4:5:302:997:951
+ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA
+>EAS188_4:5:308:552:77
+TTTTCTTTTTTTTCTTTTCTCTTTTTTTTTTTTTT
+>EAS188_4:5:8:377:655
+CTATTTTTGTCTTGACACCCAACTAATATTTGTCT
+>EAS188_4:5:8:377:655
+CTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCT
+>EAS188_4:7:282:567:481
+GGAACAGGGAGGCGCACTAATGCGCTCCACGCCCA
+>EAS188_4:7:282:567:481
+TGCAATTAATATAATTGTGTCCACGTACACACGCT
+>EAS188_4:7:296:401:60
+AATGAAAGAGGCTCAAAGAATGCCAGGAAGATACA
+>EAS188_4:7:35:408:348
+AAGAAACGCGTAACTGCGCTCTCATACACTCCAGC
+>EAS188_4:7:35:408:348
+GGTTCTCAAGGTTGTTGCAATGGGGTCTATGTGAA
+>EAS188_4:7:78:583:670
+CAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTT
+>EAS188_4:7:78:583:670
+TAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCT
+>EAS188_4:7:96:899:106
+TCTATAAAGGAAATCCCATCAGAATAACAATGGGC
+>EAS188_4:7:96:899:106
+TTCCTGACAAGCAAATGCTAAGATAATTCATCATC
+>EAS188_7:1:115:683:296
+AACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTC
+>EAS188_7:1:115:683:296
+CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC
+>EAS188_7:1:177:522:118
+TAAACTAAGCATCATAAATGAAGGGGAAATAAAGT
+>EAS188_7:1:177:522:118
+TCTCAATTAATTGTTTTATAAAACCTGTGAGTTTT
+>EAS188_7:1:290:286:763
+TTAAAATTTAACAAAAGTAAATAAAACACATAGCT
+>EAS188_7:1:290:286:763
+TTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTT
+>EAS188_7:1:316:949:122
+TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCT
+>EAS188_7:1:316:949:122
+TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT
+>EAS188_7:1:77:251:446
+CAGCATGGTTGTACTGGGCAATACATGAGATTATT
+>EAS188_7:1:77:251:446
+TTATCATGACTCTATCCCAAATGCCCAATTACGTC
+>EAS188_7:2:152:765:744
+ACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGT
+>EAS188_7:2:152:765:744
+TTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACAT
+>EAS188_7:2:172:622:707
+ACATGGCTGATTATGAAAACAATGTTCCCCAGATA
+>EAS188_7:2:172:622:707
+TTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCC
+>EAS188_7:2:187:227:818
+CAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAG
+>EAS188_7:2:187:227:818
+CCCCCTTGCAACAACCTTGAGAACCCCAGGGAATT
+>EAS188_7:2:19:736:559
+AAGACTTCATCAAGATATGTAGTCATCAGACTATC
+>EAS188_7:2:19:736:559
+TGAACCTATGAGTCACAGGTATTCCTGAGGAAAAA
+>EAS188_7:2:218:877:489
+TATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTG
+>EAS188_7:2:259:219:114
+GAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCA
+>EAS188_7:2:259:219:114
+TTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAA
+>EAS188_7:3:100:735:530
+GCACACTTTAAATCAACAACAGTAAAATAAAACAA
+>EAS188_7:3:100:735:530
+TGATGATGGTTACACTAAAAGCCCATACTTTACTG
+>EAS188_7:3:101:572:491
+CAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTG
+>EAS188_7:3:101:572:491
+TTCCAAAGATGAAACGCGTAACTGCGCTCTCATTC
+>EAS188_7:3:13:122:187
+AGCATTTTGTCAGTTACCAAATGTGTTTATTACCA
+>EAS188_7:3:13:122:187
+GACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCCTT
+>EAS188_7:3:15:568:42
+TTTTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS188_7:3:15:568:42
+TTTTTTTTTTTGTTTTTTTTTTTTTTTTTTTTATA
+>EAS188_7:3:182:104:921
+ATCAAGATATGTAGTCATCAGACTATCTAAAGTCA
+>EAS188_7:3:182:104:921
+CACGAATGTTATGCCCTGCTAAACTAAGCATCATA
+>EAS188_7:3:200:712:439
+CGTCACCCGGTCCCTGCCCCATCTCTTGTAATCTC
+>EAS188_7:3:200:712:439
+GTTGGGAGATTTTTAATGATTCCTCAATGTTAAAA
+>EAS188_7:3:296:224:724
+ATAGTTGAAAGCTCTAACAATAGACTAAACCAAGC
+>EAS188_7:3:296:224:724
+TGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC
+>EAS188_7:3:76:333:905
+AATTGTGTCCATGTACACACGCTGTCCTATGTACT
+>EAS188_7:3:76:333:905
+TTTCTGCCCCCAGCATGGTTGTACTGGGCAATACA
+>EAS188_7:4:164:719:947
+AAATTAACATTACAACAGGAACAAAACCTCATATA
+>EAS188_7:4:164:719:947
+ACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCT
+>EAS188_7:4:171:104:398
+AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC
+>EAS188_7:4:171:104:398
+CAATTAATATAATTGTGTCCATGTACACACGCTGT
+>EAS188_7:4:21:443:404
+AGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCC
+>EAS188_7:4:21:443:404
+TTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCT
+>EAS188_7:4:238:441:727
+GTAATTGAAAAATTCATTTAAGAAATTACAAAATA
+>EAS188_7:4:259:869:641
+GTTGGGAGATTTTTAATGATTCCTCAATGTTAAAA
+>EAS188_7:4:259:869:641
+TGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCA
+>EAS188_7:4:92:693:228
+AAGGTTTTATAAAAAAATTAATTGAGACTACAGAG
+>EAS188_7:4:92:693:228
+AGCATCATAAATGAAGGGGAAATAAAGTCAAGTCT
+>EAS188_7:5:112:51:128
+AGACCCCCTTGCAACAACCTTGAGAACCCCAGGGA
+>EAS188_7:5:112:51:128
+CCCAATGGACCTGTGATATCTGGATTCTGGGAAAT
+>EAS188_7:5:115:249:673
+TAAAGAAAAAAAAACCTGTCAAACACGAATGTTAT
+>EAS188_7:5:115:249:673
+TAAATACACACAAAAGTACAAAACTCACAGGTTTT
+>EAS188_7:5:163:982:695
+CTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGA
+>EAS188_7:5:163:982:695
+TCAAAGAATGCCAGGAAGATACATTGCAAGACAGA
+>EAS188_7:5:308:354:124
+GCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTT
+>EAS188_7:5:308:354:124
+TGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT
+>EAS188_7:5:74:329:459
+ACAGAGCAACTAGGTAAAAAATTAACATTACAACA
+>EAS188_7:5:74:329:459
+TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG
+>EAS188_7:6:107:447:488
+TAATTGGTACAATGTACAATATTCTGATGATGGTT
+>EAS188_7:6:107:447:488
+TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA
+>EAS188_7:6:107:636:642
+AAGTCTTTCCTGACAAGCAAATGCTAAGATAATTC
+>EAS188_7:6:107:636:642
+GTCATCTATAAAGGAAATCCCATCAGAATAACAAT
+>EAS188_7:6:11:994:584
+GAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAG
+>EAS188_7:6:11:994:584
+GGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTT
+>EAS188_7:6:191:540:493
+AAAAGCCCATACTTTACTGCTACTCAATATATCCA
+>EAS188_7:6:191:540:493
+GTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGT
+>EAS188_7:6:194:998:663
+ACCTAACACAAGACTACCCAGATTCATAAAACAAA
+>EAS188_7:6:194:998:663
+TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA
+>EAS188_7:6:205:873:464
+AGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAG
+>EAS188_7:6:205:873:464
+CCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATT
+>EAS188_7:6:46:122:479
+AAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTT
+>EAS188_7:6:46:122:479
+AAGTGAGAAGTTTGGAAGAACTATTTGAGGAAGTA
+>EAS188_7:7:19:886:279
+CCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTA
+>EAS188_7:7:19:886:279
+GAAAGGAGCATTTTGTCAGTTACCAAATGTGTTTA
+>EAS188_7:7:213:309:373
+TAGACATCTAAATGAAAGAGGCTCAAAGAATGCCA
+>EAS188_7:7:213:309:373
+TTAAAAATGAACAGAGCTTTCAAGAAGTATGAGAT
+>EAS188_7:7:243:876:758
+AGCCCAGATCCAGATTGCTTGTGGTCTGACAGGCT
+>EAS188_7:7:243:876:758
+CCCAGATACCATCCCTGTCTTACTTCCAGCTCCCC
+>EAS188_7:7:67:719:786
+GGATGAGAAATTACCTAATTGGTACACTGTACAAT
+>EAS188_7:7:67:719:786
+TAAAAAAAAAAAAGCAAAAACAAAAACTATGCTAA
+>EAS188_7:8:60:182:718
+GTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTT
+>EAS188_7:8:60:182:718
+TGACACCCAACTAATATTTGTCTGAGCAAAACAGT
+>EAS188_7:8:64:350:174
+CCTAACACAAGACTACCCAGATTCATAAAACAAAT
+>EAS188_7:8:64:350:174
+GTTCTACGCAAACAGAAACCAAATGAGAGAAGGAG
+>EAS192_3:1:114:19:769
+AAACACGAATGTTATGCCCTGCTAAACTAAGCATC
+>EAS192_3:1:114:19:769
+TAAAGTCAACATGAAGGAAAAAAATTCTAAAATCA
+>EAS192_3:1:225:195:543
+AACAACCTTGAGAACCCCAGGGAATTTGTCAATGT
+>EAS192_3:1:225:195:543
+GGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTG
+>EAS192_3:3:194:378:230
+AATACTACTAGACCTAAGAGGGATGAGAAATTACC
+>EAS192_3:3:194:378:230
+ATGAGAGAAGGAGTAGCTATACTTATATCAGATAA
+>EAS192_3:3:221:881:916
+TAACACAAGACTACCCAGATTCATAAAACAAATAC
+>EAS192_3:3:221:881:916
+TAATTCTAAATCTAGAACAAAATTAAAATTTAACA
+>EAS192_3:3:257:611:440
+ACCCTGAGAGATTCTGCAGCCCAGATCCAGATTGC
+>EAS192_3:3:257:611:440
+GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC
+>EAS192_3:3:27:973:518
+CTGATTATGAAAACAATGTTCCCCAGATACCATCC
+>EAS192_3:3:27:973:518
+TCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGT
+>EAS192_3:3:285:349:797
+ATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTC
+>EAS192_3:3:285:349:797
+GTTTTAAAAAACCAATAATTGAGACTACAGAGCAA
+>EAS192_3:3:309:187:267
+ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC
+>EAS192_3:3:309:187:267
+GGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAA
+>EAS192_3:3:88:866:774
+ATGTAACAAATCTGCTCTTGTACTTCTAAATCTAT
+>EAS192_3:3:88:866:774
+TTGTTTTCCACTTTGGAAAACAATTTGGTAATTTC
+>EAS192_3:4:184:237:476
+ATCATACAATGATAAAAAGATCAATTCAGCAAGAA
+>EAS192_3:4:184:237:476
+ATGAACTAACTATATGCTGTTTACAAGAAACTCAT
+>EAS192_3:4:255:549:422
+AAGTCATCTATAAAGGAAATCCCATCAGAATAACA
+>EAS192_3:4:255:549:422
+CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAAT
+>EAS192_3:4:293:168:240
+ATGAACAGAGCTTTCAAGAAGTATGAGATTATGTA
+>EAS192_3:4:293:168:240
+CAAAGAATGCCAGGAAGATACATTGCAAGACAGAC
+>EAS192_3:4:312:915:751
+AATAAAGTCAAGTCTTTCCTGACAAGCAAAAGCTA
+>EAS192_3:4:312:915:751
+ATCTATAAAGGAAATCCCATCAGAATAACAATGGG
+>EAS192_3:4:63:5:870
+AAAGAAAAAAGAATTTTAAAAATGAACAGAGCTTT
+>EAS192_3:4:63:5:870
+GAAAGAGGCTCAAAGAATGCCAGGAAGATACATTG
+>EAS192_3:5:197:914:256
+ACTAGACCTAAGAGGGATGAGAAATTACCTAATTG
+>EAS192_3:5:197:914:256
+TCAGATAAAGCACACTTTAAATCAACAACAGTAAA
+>EAS192_3:5:223:142:410
+CTATTCAGTTCTAAATATAGAAATTGAAACAGCTG
+>EAS192_3:5:27:577:849
+AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA
+>EAS192_3:5:27:577:849
+TTAGACATCTAAATGAAAGAGGCTCAAAGAATGCC
+>EAS192_3:5:287:334:110
+GATGAATACTAAGATTGATGTAGCAGCTTTTGCAA
+>EAS192_3:5:287:334:110
+TATGTAAAGTAACTGAACCTATGAGTCACAGGTAT
+>EAS192_3:6:116:464:261
+CAATGATAAAAAGATCAATTCAGCAAGAAGATATA
+>EAS192_3:6:116:464:261
+CTATATGCTGTTTACAAGAAACTCATTAATAAAGA
+>EAS192_3:6:170:169:57
+GGCTTGACCTCTGGTGACTGCCAGAGCTGCTGGCC
+>EAS192_3:6:170:169:57
+TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA
+>EAS192_3:6:175:437:950
+CCAAATTCCCAATTACGTCCTATCTTCTTCTTAGG
+>EAS192_3:6:175:437:950
+CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC
+>EAS192_3:6:185:868:496
+CCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTT
+>EAS192_3:6:185:868:496
+GATTTTTAATGATTCCTCAATGTTAAAATGTCTAT
+>EAS192_3:6:201:195:757
+CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC
+>EAS192_3:6:201:195:757
+TATGTACTTATCATGACTCTATCCCAAATTCCCAA
+>EAS192_3:6:216:292:528
+GGGAGATTTTTAATGATTCCTCAATGTTAAAATGT
+>EAS192_3:6:216:292:528
+TAATGATGCCCCTTGGCCATCACCCAGTCCCTGCC
+>EAS192_3:6:235:505:553
+GGTGGAAAAAGATGTTCTACGCAAACAGAAACCAA
+>EAS192_3:6:235:505:553
+GTAAAAAATTAACATTACAACAGGAACAAAACCTC
+>EAS192_3:6:326:887:180
+CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT
+>EAS192_3:6:326:887:180
+TGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGC
+>EAS192_3:6:45:183:25
+CAGCGGAAACCTTACAAGCCAGAAGAGATTGGATC
+>EAS192_3:6:45:183:25
+CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT
+>EAS192_3:7:149:354:667
+CTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTT
+>EAS192_3:7:149:354:667
+GAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTC
+>EAS192_3:7:298:644:697
+CTATGTTTCTTATCTGCACATTACTACCCTGCAAT
+>EAS192_3:7:298:644:697
+TCCACGTCTCATCTAGGGGAACAGGGAGGTGCACT
+>EAS192_3:7:66:891:294
+AGCACACTTTAAATCAACAACAGTAAAATAAAACA
+>EAS192_3:7:66:891:294
+TAATTGGTACAATGTACAATATTCTGATGATGGTT
+>EAS192_3:7:78:692:671
+AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA
+>EAS192_3:7:78:692:671
+CAGCAAGAGAAAAGCATACAGTCATCTATAAAGGA
+>EAS192_3:7:93:945:176
+CCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTC
+>EAS192_3:7:93:945:176
+GTGAGCCATCACAATGAACAACAGGAAGAAAAGGT
+>EAS192_3:8:6:104:118
+AAGAATTTTAAAAATGAACAGAGCTTTCAAGAAGT
+>EAS192_3:8:6:104:118
+TCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGA
+>EAS192_3:8:6:237:885
+AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA
+>EAS192_3:8:6:237:885
+TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS1_103:1:151:159:43
+AACCTCATACACACACATGGTTTAGGGGTATAATA
+>EAS1_103:1:151:159:43
+TCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC
+>EAS1_103:1:228:736:747
+AAGATATGTAGTCATCAGACTATCTAAAGTCAACA
+>EAS1_103:1:228:736:747
+TTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACA
+>EAS1_103:1:274:176:479
+CCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTAT
+>EAS1_103:1:274:176:479
+GAAATGCTTTACTGTCATAACTATGAAGAGACTAT
+>EAS1_103:1:2:831:692
+GTGTTCTCATCAACCTCATACACACACATGGTTTA
+>EAS1_103:1:2:831:692
+TCCAGCTCCCTGTCACCCAATGGACCTGTGATATC
+>EAS1_103:2:184:980:396
+AAGGAGGTCATCATACAATGATAAAAAGATCAATT
+>EAS1_103:2:184:980:396
+ACATGAACTAACTATATGCTGTTTACAAGAAACTC
+>EAS1_103:2:226:302:758
+GGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGG
+>EAS1_103:2:226:302:758
+TGCTTGTGGTCTGACAGGCTGCAACTTTGAGCGNT
+>EAS1_103:2:234:167:381
+AAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGAT
+>EAS1_103:2:234:167:381
+AGAGAAAAGCATACAGTCATCTATAAAGGAAATCC
+>EAS1_103:2:235:805:373
+TATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCA
+>EAS1_103:2:235:805:373
+TTTACTGTCATAACTATGAAGAGACTATTTCCAGA
+>EAS1_103:2:307:252:632
+ACCATCCTGCTAAATACATATGCACCTAACACAAG
+>EAS1_103:2:307:252:632
+ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG
+>EAS1_103:3:253:175:31
+CAAACAGAAACCAAATGAGAGAAGGAGTAGCTATA
+>EAS1_103:3:253:175:31
+TTCATAAAACAAATACTACTAGACCTAAGAGGGAT
+>EAS1_103:3:277:921:474
+AAAATATAGTTGAAAGCTCTAACAATAGACTAAAC
+>EAS1_103:3:277:921:474
+AAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGA
+>EAS1_103:3:320:505:814
+ACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT
+>EAS1_103:3:320:505:814
+CTGTCTTGATTTACTTGTTGTTGGTTTTCTTTTTC
+>EAS1_103:3:323:196:855
+ACAAGCAAATGCTAAGATAATTCATCATCACTAAA
+>EAS1_103:3:323:196:855
+TAAAAAATTAACATTACAACAGGAACAAAACCTCA
+>EAS1_103:3:41:474:283
+TGAACCACACATTAATACTATGTTTCTTATCTGCA
+>EAS1_103:3:41:474:283
+TTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCC
+>EAS1_103:4:143:560:194
+GTTGTACTGGGCAATACATGAGATTATTAGGAAAT
+>EAS1_103:4:143:560:194
+TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC
+>EAS1_103:4:164:79:134
+ACAATGGGCTTCTCAGCGGAAACCTTACAAGCCAG
+>EAS1_103:4:164:79:134
+AGATAATTCATCATCACTAAACCAGTCCTATAAGA
+>EAS1_103:4:231:815:626
+GATCAATACAGCAAGAAGATATAACCATCCTACTA
+>EAS1_103:4:231:815:626
+GCTGTTTACAAGAAACTCATTAATAAAGACATGAG
+>EAS1_103:4:235:899:847
+AGAAACTCATTAATAAAGACATGAGTTCAGGTAAA
+>EAS1_103:4:235:899:847
+ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT
+>EAS1_103:4:294:525:849
+AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA
+>EAS1_103:4:294:525:849
+CTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAA
+>EAS1_103:4:61:433:385
+ACTGTGAGCCATCACAATGAACAACAGGAAGAAAA
+>EAS1_103:4:61:433:385
+GGATGGAGGGAAGAGGGACGCTGAAGCACTTTGAT
+>EAS1_103:5:141:711:813
+TATTACCAGAGGGATGGAGGGAAGAGGGACGCTGA
+>EAS1_103:5:141:711:813
+TATTTGTAATGAAAACTATATTTATGCTATTCAGT
+>EAS1_103:5:188:20:592
+CTACTAGACCTAAGAGGGATGAGAAATTACCTAAT
+>EAS1_103:5:188:20:592
+GAGTAGCTATACTTATATCAGATAAAGCACACTTT
+>EAS1_103:5:285:241:560
+GAAATTACAAAATATAGTTGAAAGCTCTAACAATA
+>EAS1_103:5:285:241:560
+TAAAGTAACTGAACCTATGAGTCACAGGTATTCCT
+>EAS1_103:5:319:165:698
+ATCACCCAGTCCCTGCCCCATATCTTGTAATCTCT
+>EAS1_103:5:319:165:698
+TGACACCCAACTAATATTTGTCTGAGCAAAACAGT
+>EAS1_103:6:7:858:437
+CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG
+>EAS1_103:6:7:858:437
+CCATTTCTTTTGGCATTTGCCTTCAGACCCTACAC
+>EAS1_103:7:112:578:782
+AAAATAAAACACATAGCTAAAACTAAAAAAGCAAA
+>EAS1_103:7:112:578:782
+CAGATTCATAAAACAAATACTACTAGACCTAAGAG
+>EAS1_103:7:139:578:951
+AAAAACATGAACTAACTATATGCTGTTTACAAGAA
+>EAS1_103:7:139:578:951
+GAGGTCATCATACAATGATAAAAAGATCAATTCAG
+>EAS1_103:7:166:84:766
+ATAACACAAGACTACCCAGATTCATAAAACAAATA
+>EAS1_103:7:166:84:766
+GGAAAAAGATGTTCTACGCAAACAGAAACCAAATG
+>EAS1_103:7:311:100:539
+AAATTCTTCATCCTGGACCCTGAGAGATTCTGCAG
+>EAS1_103:7:311:100:539
+CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA
+>EAS1_103:7:313:83:546
+TCATTCACTCCAGCTCCCTGTCACCCAATGGACCT
+>EAS1_103:7:313:83:546
+TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA
+>EAS1_103:7:53:783:78
+AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA
+>EAS1_103:7:53:783:78
+TGCAAGACAGACTTCATCAAGATATGTAGTCATCA
+>EAS1_105:1:115:226:443
+AAATCTGCGCTTGTACTTCTAAATCTATAAAAAAA
+>EAS1_105:1:115:226:443
+ATATAACCATCCTACTAAATACATATGCACCTAAC
+>EAS1_105:1:141:415:738
+AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA
+>EAS1_105:1:141:415:738
+TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT
+>EAS1_105:1:234:185:359
+AAGGAGTAGCTATACTTATATCAGATAAAGCACAC
+>EAS1_105:1:234:185:359
+CAGATTCATAAAACAAATACTACTAGACCTAAGAG
+>EAS1_105:1:28:745:352
+ATATGCACCTAACACAAGACTACCCAGATTCATAA
+>EAS1_105:1:28:745:352
+CTTCTAAATCTATAACAAAATTAAAATTTAACAAA
+>EAS1_105:1:297:283:948
+CCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTG
+>EAS1_105:1:297:283:948
+TGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTC
+>EAS1_105:1:329:407:872
+TTCGTTTTTTTTTTTTTTTTTTCCCTTTTTTTTTT
+>EAS1_105:1:3:903:957
+AAGCCAGAAGAGATTGGATCTAATTTTTGGACTTC
+>EAS1_105:1:3:903:957
+ATTCATCATCACTAAACCAGTCCTATAAGAAATGC
+>EAS1_105:1:45:239:851
+CTAAGAGGGATGAGAAATTACCTAATTGGTACAAT
+>EAS1_105:1:45:239:851
+TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA
+>EAS1_105:1:87:430:995
+GAAAAGAGTTAAAAACATGAACTAACTATATGCTG
+>EAS1_105:1:87:430:995
+TACTCACCATCATAAATACACACAAAATTACAAAA
+>EAS1_105:2:110:584:649
+CCATGTACACACGCTGTCCTATGTACTTATCATGA
+>EAS1_105:2:110:584:649
+CTGTAATGATGCCCCTTGGCCATCACCCGGTCCCT
+>EAS1_105:2:146:374:692
+AAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAA
+>EAS1_105:2:146:374:692
+ATTAAATTCCCCCACTTAAGAGATATAGATTGGCA
+>EAS1_105:2:179:532:82
+CCATCACCCGGTCCCTGCCCCATCTCTTGTAATCT
+>EAS1_105:2:179:532:82
+TGTACTTATCATGACTCTATCCCAAATTCCCAATT
+>EAS1_105:2:280:662:939
+AAGAGAAAAGCATACAGTCATCTATAAAGGAAATC
+>EAS1_105:2:280:662:939
+CCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAA
+>EAS1_105:2:299:360:220
+ATTCAGTTCTAAATATAGAAATTGAAACAGCTGTG
+>EAS1_105:2:299:360:220
+GAAGAACTTAGATGCCCTCTTCTTCCAAAGATGAA
+>EAS1_105:2:301:161:195
+ACAGTAAAATAAAACAAAGGAGGTCATCATACAAT
+>EAS1_105:2:301:161:195
+GTACAATGTACAATATTCTGATGATGGTTACACTA
+>EAS1_105:3:176:431:647
+ATCATGACTCTATCCCAAATTCCCAATTACGTCCT
+>EAS1_105:3:176:431:647
+CCATCACCCAGTCCCTGCCCCATCTCTTGTAATCT
+>EAS1_105:3:182:404:693
+ACCTCATACACACACATGGTTTAGGGGTATAATAC
+>EAS1_105:3:182:404:693
+GCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCAT
+>EAS1_105:3:232:364:583
+CAATTAATTGAGACTACAGAGCAACTAGGTAAAAA
+>EAS1_105:3:232:364:583
+TCATTAATAAAGACATGAGTTCAGGTAAAGGGGTG
+>EAS1_105:3:308:66:538
+CGCTTGTACTTCTAAATCTATAACAAAATTAAAAT
+>EAS1_105:3:308:66:538
+TATAACCATCCTACTAAATACATATGCACCTAACA
+>EAS1_105:3:329:177:267
+CATGAGATTATTAGGAAATGCTTTACTGTCATAAC
+>EAS1_105:3:329:177:267
+TACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA
+>EAS1_105:3:7:35:528
+TTTTTTTTTTGTTCTTTACTCTTTTTTTTTTTTTT
+>EAS1_105:3:86:823:683
+ACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCG
+>EAS1_105:3:86:823:683
+CAGTTCTAAATATAGAAATTGAAACAGCTGTGTTT
+>EAS1_105:6:134:853:558
+AATATAATTGTGTCCATGTACACACGCTGTCCTAT
+>EAS1_105:6:134:853:558
+GGAACAGGGAGGTGCACTAATGCGCTCCACGCCCA
+>EAS1_105:6:162:594:858
+CTACCACAGGGGGCTGCGCGGTTTCCCATCATGAA
+>EAS1_105:6:162:594:858
+GTGTTCTCATCAACCTCATACACACACATGGTTTA
+>EAS1_105:6:172:827:592
+AATTGTAAAAGTCAAAATTAAAGTTCAATACTCAC
+>EAS1_105:6:172:827:592
+TAGATTGGCAGAACAGATTTAAAAACATGAACTAA
+>EAS1_105:6:23:885:274
+ACAAAGGAGGTCATCATACAATGATAAAAAGATCA
+>EAS1_105:6:23:885:274
+CTACTACTCAATATATCCATGTAACAAATCTGCGC
+>EAS1_105:6:267:953:459
+ACCTTACAAGCCAGAAGAGATTGGATCTAATTTTT
+>EAS1_105:6:267:953:459
+CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA
+>EAS1_105:7:110:355:323
+ACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGG
+>EAS1_105:7:110:355:323
+CCCAATGGACCTGTGATATCTGGATTCTGGGAAAT
+>EAS1_105:7:168:247:414
+AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATA
+>EAS1_105:7:168:247:414
+TAAATTCCCCCACTTAAGAGATATAGATTGGCAGA
+>EAS1_105:7:289:472:86
+ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA
+>EAS1_105:7:45:462:455
+TCCCTGTCTTACTTCCAGCTCCCCAGACGGAACGC
+>EAS1_105:7:45:462:455
+TCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCA
+>EAS1_105:7:57:722:347
+ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT
+>EAS1_105:7:57:722:347
+CGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTG
+>EAS1_105:8:160:130:351
+CAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAAT
+>EAS1_105:8:160:130:351
+TGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAA
+>EAS1_105:8:179:119:876
+ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT
+>EAS1_105:8:179:119:876
+TTAAAAACATGAACTAACTATATGCTGTTTACCAG
+>EAS1_105:8:24:718:322
+AACTCATTAATAATGTCATGAGTTCAGGTAAAGGG
+>EAS1_105:8:24:718:322
+ACAATTAATTGAGACTACAGACCAATTATGTAAAA
+>EAS1_105:8:254:617:73
+AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT
+>EAS1_105:8:254:617:73
+GCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTC
+>EAS1_105:8:256:404:584
+ACTGTCCTATGTACTTATCATGACTCTATCCCAAA
+>EAS1_105:8:256:404:584
+GTTTCTGCCCCCAGCATGGTTGTACTGGGCAATAC
+>EAS1_105:8:96:720:940
+AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC
+>EAS1_105:8:96:720:940
+TAAGATAATTCATCATCACTAAACCAGTCCTATAA
+>EAS1_108:1:111:796:737
+ATCATGACTCTATCCCAAATTCCCAATTACGTCCT
+>EAS1_108:1:111:796:737
+CCCCAGCATGGTTGTACTGGGCAATACATGTGATT
+>EAS1_108:1:131:518:588
+AAAATCAGCAAGAGAAAAGCATACAGTCATCTATA
+>EAS1_108:1:131:518:588
+ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG
+>EAS1_108:1:148:286:316
+AGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCT
+>EAS1_108:1:148:286:316
+CCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGT
+>EAS1_108:1:155:809:543
+AAATTTAACAAAAGTAAATAAAACACATAGCTAAA
+>EAS1_108:1:155:809:543
+TACATATGCACCTAACACAAGACTACCCAGATTCA
+>EAS1_108:1:16:438:245
+TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGA
+>EAS1_108:1:189:863:213
+CTACTAGACCTAAGAGGGATGAGAAATTACCTAAT
+>EAS1_108:1:189:863:213
+TATACTTATATCAGATAAAGCACACTTTAAATCAA
+>EAS1_108:1:242:419:512
+AATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAA
+>EAS1_108:1:242:419:512
+CTAAACCAGTCCTATAAGAAATGCTCAAAAGAATT
+>EAS1_108:1:277:194:143
+TGGGCTGTAATGATGCCCCTTGGCCATCACCCGGT
+>EAS1_108:1:277:194:143
+TTTTTAATGATTCCTCAATGTTAAAATGTCTATTT
+>EAS1_108:1:328:614:638
+AAAGGTTGTTGGGAGATTTTTAATGATTCCTCAAT
+>EAS1_108:1:328:614:638
+ACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGC
+>EAS1_108:1:33:779:821
+AATGTTATGCCCTGCTAAACTAAGCATCATAAATG
+>EAS1_108:1:33:779:821
+TGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAA
+>EAS1_108:1:49:911:980
+ACAATGTACAATATTCTGATGATGGTTACACTAAA
+>EAS1_108:1:49:911:980
+GGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGG
+>EAS1_108:1:65:787:74
+TGTAATGAAAACTATATTTATGCTATTCAGTTCTA
+>EAS1_108:2:102:543:160
+CAACAGGAACAAAACCTCATATATCAATATTAACT
+>EAS1_108:2:102:543:160
+CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA
+>EAS1_108:2:116:966:193
+ATTAACATTACAACAGGAACAAAACCTCATATATC
+>EAS1_108:2:116:966:193
+GACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGT
+>EAS1_108:2:170:326:433
+CTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTC
+>EAS1_108:2:170:326:433
+TTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGG
+>EAS1_108:2:176:653:957
+AAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA
+>EAS1_108:2:176:653:957
+ACATTACAACAGGAACAAAACCTCATATATCAATA
+>EAS1_108:2:204:737:61
+AAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGT
+>EAS1_108:2:204:737:61
+TCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG
+>EAS1_108:2:240:593:842
+ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT
+>EAS1_108:2:240:593:842
+CATGGTTTAGGGGTATAATACCTCTACATGGCTGA
+>EAS1_108:2:266:994:429
+ACAGGTTTTATAAAACAATTAATTGAGACTACAGA
+>EAS1_108:2:266:994:429
+TGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAG
+>EAS1_108:2:316:176:543
+ATGTCTATTTTTGTCTTGACACCCAACTAATATTT
+>EAS1_108:2:316:176:543
+CATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCC
+>EAS1_108:2:49:271:588
+CACATGGTTTAGGGGTATAATACCTCTACATGGCT
+>EAS1_108:2:49:271:588
+GCTGCGCGGTTTCCCATCATGAAGCACTGAACTTC
+>EAS1_108:2:62:879:264
+AATGAAAACTATATTTATGCTATTCAGTTCTAAAT
+>EAS1_108:2:62:879:264
+GGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAA
+>EAS1_108:2:82:879:246
+AAGAGGGACGCTGAAGAATTTTGATGCCCTCTTCT
+>EAS1_108:2:82:879:246
+ACAACTGTGAGCCATCACAATGAACAACAGGAAGA
+>EAS1_108:2:85:580:481
+AAAAAAGTAAATAAAACACATAGCTAAAACTAAAA
+>EAS1_108:2:85:580:481
+CTAACACAAGACTACCCAGATTCATAAAACAAATA
+>EAS1_108:3:216:988:883
+AAGCCAACACAATGAACAACAGGAAGAAAAGGTCT
+>EAS1_108:3:216:988:883
+AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT
+>EAS1_108:3:24:319:429
+GTTATGCCCTGCTAAACTAAGCATCATAAATGAAG
+>EAS1_108:3:24:319:429
+TAATAAATACACACAAAAGTACAAAACTCACAGGT
+>EAS1_108:3:75:934:439
+AAGAGACTATTGCCAGATGAACCACACATTAATAC
+>EAS1_108:3:75:934:439
+CCCATCATGAAGCACTGAACTTCCACGTCTCATCT
+>EAS1_108:3:82:356:253
+AGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATA
+>EAS1_108:3:82:356:253
+GAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGC
+>EAS1_108:4:163:611:211
+TAAAGTCAACATGAAGGAAAAAAATTCTAAAATCA
+>EAS1_108:4:163:611:211
+TGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAAC
+>EAS1_108:4:248:753:731
+TATGCTATTCAGTTCTAAATATAGAAATTGAAACA
+>EAS1_108:4:248:753:731
+TGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGA
+>EAS1_108:4:31:622:216
+ATTTAACAAAAGTAAATAAAACACATAGCTAAAAC
+>EAS1_108:4:37:604:389
+ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG
+>EAS1_108:4:37:604:389
+TCATAAAACAAATACTACTAGACCTAAGAGGGATG
+>EAS1_108:4:75:166:463
+GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT
+>EAS1_108:4:75:166:463
+TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT
+>EAS1_108:4:91:521:517
+CCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATG
+>EAS1_108:4:91:521:517
+CCCTGCAATTAATATAATTGTGTCCATGTACACAC
+>EAS1_108:5:115:193:231
+GAAGAGATTGGATCTAATTTTTGGACTTCTTAAAG
+>EAS1_108:5:115:193:231
+TATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAA
+>EAS1_108:5:11:555:330
+CGCTTGTACTTCTAAATCTATAAAAAAATTAAAAT
+>EAS1_108:5:11:555:330
+GGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTT
+>EAS1_108:5:175:149:296
+AAAAATTAACATTACAACAGGAACAAAACCTCATA
+>EAS1_108:5:175:149:296
+AAGGGGAAATAAAGTCAAGCCTTTCCTGACAAGCA
+>EAS1_108:5:180:905:36
+CCTCCGTGTCCTCCCATCTGGCCTCGTCCACACTG
+>EAS1_108:5:180:905:36
+TACTATGTTTCTTATCTGCACATTACTACCCTGCA
+>EAS1_108:5:229:717:121
+ACTATGAAGAGACTATTGCCAGATGAACCACACAC
+>EAS1_108:5:229:717:121
+TTCTTCTGAGGGAAGAACAGCTTAGGTATCAATTT
+>EAS1_108:5:321:712:224
+AGATAAAGCACACTTTAAATCAACAACAGAAAAAT
+>EAS1_108:5:321:712:224
+ATGAGAAATTACCTAATTGGTACAATGTACAATAT
+>EAS1_108:5:89:942:84
+AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA
+>EAS1_108:5:89:942:84
+TACAAAACTCACAGGTTTTATAAAACAATTAATTG
+>EAS1_108:6:159:493:275
+ACAAAACTCACAGGTTTTATAAAACAATTAATTGA
+>EAS1_108:6:159:493:275
+TGCTGTTTACAAGAAACTCATTAATAAAGACATGA
+>EAS1_108:6:165:464:123
+CATACACACACATGGTTTAGGGGTATAATACCTCT
+>EAS1_108:6:165:464:123
+GTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCA
+>EAS1_108:6:222:579:961
+AGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAG
+>EAS1_108:6:222:579:961
+CAGAAACCTTACAAGCCAGAAGAGATTGGATCTAA
+>EAS1_108:6:71:187:824
+AGACAGACTTCATCAAGATATGTAGTCATCAGACT
+>EAS1_108:6:71:187:824
+TCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCT
+>EAS1_108:6:73:735:329
+AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA
+>EAS1_108:6:73:735:329
+TAGCTATACTTATATCAGATAAAGCACACTTTAAA
+>EAS1_108:6:77:48:860
+CTCTGTCTTGATTTACTTGTTGTTTGTTTTCTGTT
+>EAS1_108:6:77:48:860
+TAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGC
+>EAS1_108:6:94:294:387
+ACCATCATAAATACACACAAAAGTACAAAACTCAC
+>EAS1_108:6:94:294:387
+GAATGTTATGCCCTGCTAAACTAAGCATCATAAAT
+>EAS1_108:6:95:235:746
+CTAAGCATCATAAATGAAGGGGAAATAAAGTCAAG
+>EAS1_108:6:95:235:746
+TCTAAAATCAGCAAGAGAAAAGCATACAGACATCT
+>EAS1_108:7:108:440:208
+CCCATCCTACTAAATACATATGCACCTAACACAAG
+>EAS1_108:7:108:440:208
+TTCAGGTAAAGGGGAGGAAAAAGATGTTCTACGCA
+>EAS1_108:7:222:538:267
+ATAATTGTGTCCATGTACACACGCTGTCCTATTTA
+>EAS1_108:7:222:538:267
+TCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTT
+>EAS1_108:7:266:556:252
+CCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAA
+>EAS1_108:7:266:556:252
+GATTTGGTGGAAGACATAATCCCACGCTTCCTATG
+>EAS1_108:7:82:926:112
+CAATGTCAGGGAAGGAGCATTTTGTCAGTTGCCAA
+>EAS1_108:7:82:926:112
+CTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTC
+>EAS1_108:8:118:440:850
+AATTGAAAAATTCATTTAAGAAATTACAAAATATA
+>EAS1_108:8:129:477:427
+ATGGACCTGTGATATCTGGATTCTGGGAAATTCTT
+>EAS1_108:8:129:477:427
+TACACACACATGGTTTAGGGGTATAATACCTCTAC
+>EAS1_108:8:19:929:765
+AAAAACATGAACTAACTATATGCTGTTTACAAGAA
+>EAS1_108:8:19:929:765
+ATCAACAACAGTAAAATAAAACAAAGGAGGTCATC
+>EAS1_93:1:131:946:353
+TCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTG
+>EAS1_93:1:131:946:353
+TGTACACACGCTGTCCTATGTACTTATCATGACTC
+>EAS1_93:1:179:629:513
+GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA
+>EAS1_93:1:179:629:513
+GTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAG
+>EAS1_93:1:20:635:509
+CAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGT
+>EAS1_93:1:20:635:509
+TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT
+>EAS1_93:1:214:784:690
+AAATCCCATCAGAATAACAATGGGCTTCTCAGCGG
+>EAS1_93:1:214:784:690
+GATAATTCATCATCACTAAACCAGTCCTATAAGAA
+>EAS1_93:1:216:381:608
+TAATTGTGTCCATGTACACTCGCTGTCCTATGTAC
+>EAS1_93:1:216:381:608
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS1_93:1:253:59:242
+CCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCA
+>EAS1_93:1:253:59:242
+TTTGTCTTGACACCCAACTAATATTTGTCTGAGCA
+>EAS1_93:1:264:988:663
+CGAGGGGAACAGGGAGGTGCACTAATGCGCTCCAC
+>EAS1_93:1:264:988:663
+TGGCTGATTATGAAAACAATGTTCCCCAGATACCA
+>EAS1_93:1:92:213:217
+ACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCT
+>EAS1_93:1:92:213:217
+TGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGT
+>EAS1_93:2:173:995:93
+GCTGGAGGGAAGAGGGACGCTGAAGAACTTTGATG
+>EAS1_93:2:173:995:93
+TAATGAAAACTATATTTATGCTATTCAGTTCTAAA
+>EAS1_93:2:286:923:549
+TCAAATGAACTTCTGTAATTGAAAAATTCATTTAA
+>EAS1_93:2:30:466:652
+AAAAATGAACAGAGCTTTCAAGAAGTATGAGATTA
+>EAS1_93:2:30:466:652
+AAGAGGCTAAAAGAATGCCAGGAAGATACATTGCA
+>EAS1_93:2:313:711:530
+ACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTT
+>EAS1_93:2:313:711:530
+TAATTGAGACTACAGAGCAACTAGGTAAAAAATTA
+>EAS1_93:3:181:93:694
+ACTCATTAATAAAGACATGAGTTCAGGTAAAGGGG
+>EAS1_93:3:181:93:694
+TTAATTGAGACTACAGAGCAACTAGGTAAAAAATT
+>EAS1_93:3:79:879:15
+AGACTACAGAGCAACTAGGTAAAAAATTAACATTA
+>EAS1_93:3:79:879:15
+AGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATA
+>EAS1_93:4:160:896:275
+AAAACTATATTTATGCTATTCAGTTCTAAATATAG
+>EAS1_93:4:160:896:275
+AGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTC
+>EAS1_93:4:321:271:138
+GTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA
+>EAS1_93:4:321:271:138
+TTTGGTGGAAGACATAATCCCACGCTTCCTATGGA
+>EAS1_93:4:325:352:67
+ATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAA
+>EAS1_93:4:325:352:67
+TACAGAGCAACTAGGTAAAAAATTAACATTACAAC
+>EAS1_93:5:197:52:58
+AAATGAACAGAGCTTTCAAGAAGTATGAGATTATG
+>EAS1_93:5:197:52:58
+TCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGA
+>EAS1_93:5:246:177:525
+CATCATAAATACACACAAAAGTAAAAAACTCACAG
+>EAS1_93:5:246:177:525
+TCTTAAAGAAAAAAAAACCTGTCAAACACGAATGT
+>EAS1_93:5:256:444:399
+CTGCTACTCAATATATCCATGTAACAAATCTGCGC
+>EAS1_93:5:256:444:399
+GAAGATATAACCATCCTACTAAATACATATGCACC
+>EAS1_93:5:292:122:666
+GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG
+>EAS1_93:5:292:122:666
+TACTGTCATAACTATGAAGAGACTATTGTCAGATG
+>EAS1_93:5:62:969:12
+TATAAAGGAAATCCCATCAGAATAACAATGGGCTT
+>EAS1_93:5:62:969:12
+TCTTGCTAGAGATTTAGACATCTAAATGAAAGAGG
+>EAS1_93:5:66:372:343
+ATTACAAAATATAGTTGAAAGATCTAACAATAGAC
+>EAS1_93:5:66:372:343
+TATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGG
+>EAS1_93:6:132:717:233
+AGAGAACTTCCCTGGAGGTCTGATGGAGTTTCTCC
+>EAS1_93:6:132:717:233
+TGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGA
+>EAS1_93:6:159:273:253
+TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA
+>EAS1_93:6:191:948:257
+AACTAATATTTGTCTGAGCAAAACAGTCTAGATGA
+>EAS1_93:6:191:948:257
+CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT
+>EAS1_93:6:216:47:302
+AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT
+>EAS1_93:6:216:47:302
+AATACTCACCATCATAAATACACACAAAAGTACAA
+>EAS1_93:6:218:144:794
+GGGTGCATTGCTATGTTGCGGTCGCTTTGCCTCCT
+>EAS1_93:6:218:144:794
+TTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTT
+>EAS1_93:6:238:514:194
+AAACTATTTGAGGAAGTAATTGGGGAAAACCTCTT
+>EAS1_93:6:238:514:194
+AATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGA
+>EAS1_93:6:255:441:47
+AACAACAGTAAAATAAAACAAAGGAGGTCATCATA
+>EAS1_93:6:255:441:47
+TGGTACAATGTACAATATTCTGATGATGGTTACAC
+>EAS1_93:6:271:244:568
+ATGGACCTGTGATATCTGGATTCTGGGAAATTCTT
+>EAS1_93:6:271:244:568
+CTTGCAACAACCTTGAGAACCCCAGGGAATTTGTC
+>EAS1_93:6:45:601:439
+AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG
+>EAS1_93:6:45:601:439
+ATAAAAAGGGATTAAATTCCCCCACTTAAGAGATA
+>EAS1_93:7:14:426:613
+AGGGAGGGAGGGAAGAGGGACGCTGAAGAACTTTG
+>EAS1_93:7:14:426:613
+GTAATGAAAACTATATTTATGCTATTCAGTTCTAA
+>EAS1_93:7:252:171:323
+GCTATTCAGTTCTAAATATAGAAATTGAAACAGCT
+>EAS1_93:7:270:995:918
+AAAATTCATTTAAGAAATTACAAAATATAGTTGAA
+>EAS1_93:7:319:280:57
+AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC
+>EAS1_93:7:319:280:57
+TGCTAGAGATTTAGACATCTAAATGAAAGAGGCTC
+>EAS1_93:8:13:325:483
+AAAAACATGAACTAACTATATGCTGTTTACAAGAA
+>EAS1_93:8:13:325:483
+ATCATACAATGATAAAAAGATCAATTCAGCAAGAA
+>EAS1_93:8:14:601:624
+AAAAGGTGATGTGTGTTCTCATCAACCTCATACAC
+>EAS1_93:8:14:601:624
+CTGCGCTCTCATTCACTCCAGCTCCCTGTCAACCC
+>EAS1_95:1:16:823:343
+AGACATAACCCCACGCTTCCTATGGAAAGGTTGTT
+>EAS1_95:1:16:823:343
+TCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAA
+>EAS1_95:1:196:533:921
+AAATGTGTTTATTACCAGAGGGATGGAGGGAAGAG
+>EAS1_95:1:196:533:921
+CCTGAGAGATTCTGCAGCCCAGATCCAGATTGCTT
+>EAS1_95:1:202:341:984
+GGAGGTCATCATACAATGATAAAAAGATCAATTCA
+>EAS1_95:1:202:341:984
+TGAACTAACTATATGCTGTTTACAAGAAACTCATT
+>EAS1_95:1:249:986:224
+CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT
+>EAS1_95:1:249:986:224
+TATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGA
+>EAS1_95:1:261:504:780
+TCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTT
+>EAS1_95:1:261:504:780
+TTTGTCTGAGAAAAACAGTCTAGATGAGAGAGAAC
+>EAS1_95:1:301:54:240
+AACAGGGAGGTGCACTAATGCGCTCCACGCCCAAG
+>EAS1_95:1:301:54:240
+CCCTGCAATTAATATAATTGTGTCCATGTACACAC
+>EAS1_95:1:77:589:741
+AGTAAAATAAAACAAAGGAGGTCATCATACAATGA
+>EAS1_95:1:77:589:741
+TGGTTACACTAAAAGCCCATACTTTACTGCTACTC
+>EAS1_95:2:142:353:398
+CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA
+>EAS1_95:2:142:353:398
+GGTAAAAAATTAACATTACAACAGGAACAAAACCT
+>EAS1_95:2:162:503:769
+AGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTT
+>EAS1_95:2:162:503:769
+AGGCTGCAACTGTGAGCCATCACAATGAACAACAG
+>EAS1_95:2:198:691:595
+ACACACATGGTTTAGGGGTATAATACCTCTACATG
+>EAS1_95:2:198:691:595
+CATGAAGCACTGAACTTCCACGTCTCATCTAGGGG
+>EAS1_95:2:211:954:174
+AAAGAAAAAACAAAAACTATGCTAAGTATTGGTAA
+>EAS1_95:2:211:954:174
+AGACCTAAGAGGGATGAGAAATTACCTAATTGGTA
+>EAS1_95:2:228:915:631
+AAGATATGTAGTCATCAGACTATCTAAAGTCAACA
+>EAS1_95:2:228:915:631
+ATTATTAAAGAAAAAAAAACCTGTCAAACACGAAT
+>EAS1_95:2:278:918:892
+AGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCT
+>EAS1_95:2:278:918:892
+CCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAA
+>EAS1_95:2:40:918:950
+AATATAATTGTGTCCATGTACACACGCTTTCCTTT
+>EAS1_95:2:40:918:950
+GTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCT
+>EAS1_95:3:268:523:511
+ACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGAT
+>EAS1_95:3:268:523:511
+TGTCCATGTACACACGCTGTCCTATGTACTTATCA
+>EAS1_95:3:303:970:243
+AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC
+>EAS1_95:3:303:970:243
+CAGACTTCATCAAGATATGTAGTCATCAGACTATC
+>EAS1_95:3:308:956:873
+ATAAAAATAAGTGTGTCCATGTACACACGCTGTCC
+>EAS1_95:3:308:956:873
+CTCATCTAGGGGAACAGGGAGGTGCACTAATGCGC
+>EAS1_95:4:174:157:573
+CAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA
+>EAS1_95:4:174:157:573
+TAAAACAAATACTACTAGACCTAAGAGGGATGAGA
+>EAS1_95:4:176:971:874
+TAAAATCAGAAGAGAAAAGCATACAGTCATCTATA
+>EAS1_95:4:176:971:874
+TGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTG
+>EAS1_95:4:184:17:636
+TACACACACATGGTTTAGGGGTATAATACCTCTAC
+>EAS1_95:4:184:17:636
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTCACAGGT
+>EAS1_95:4:224:592:744
+GATATGTAGTCATCAGACTATCTAAAGTCAACATG
+>EAS1_95:4:224:592:744
+TATGAGATTATGTAAAGTAACTGAACCTATGAGTC
+>EAS1_95:4:238:124:196
+TTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTT
+>EAS1_95:4:61:631:567
+AAGAAGATATAACCATCCTACTAAATACATATGCA
+>EAS1_95:4:61:631:567
+CATTAATAAAGACATGAGTTCAGGTAAAGGGGTGG
+>EAS1_95:4:66:179:118
+CCATGTACACACGCTGTCCTATGTACTTATCATGA
+>EAS1_95:4:66:179:118
+TGGGCTGTAATGATGCCCCTTGGCCATCACCCAGT
+>EAS1_95:4:71:517:742
+AACTATTTGAGGAAGTAATTGGGGAAAACCTCTTT
+>EAS1_95:4:71:517:742
+AAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAG
+>EAS1_95:5:257:654:116
+TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT
+>EAS1_95:5:257:654:116
+TCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAA
+>EAS1_95:5:263:511:936
+CAAATAGGTAAAAAATTAACATTACAACAGGAACA
+>EAS1_95:5:263:511:936
+CAAGTCTTTCCTGACAAGCAAATGCTAAGATAATT
+>EAS1_95:5:284:212:932
+CTTTAAATCAACAACAATAAAAAAAAACAAAGGAG
+>EAS1_95:5:284:212:932
+TGATGATGGTTACGCTAAAAGTCCATGCTTTACTG
+>EAS1_95:6:174:650:125
+AAGCATCATAAATGAAGGGGAAATAAAGTCAAGTC
+>EAS1_95:6:174:650:125
+CAGGTTTTATAAAACAATTAATTGAGACTACAGAG
+>EAS1_95:6:185:312:167
+CTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATG
+>EAS1_95:6:185:312:167
+TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT
+>EAS1_95:6:194:696:490
+ACATTAATACTATGTTTCTTATCTGCACATTACTA
+>EAS1_95:6:194:696:490
+TTCCACGTCTCATCTAGGGGAACAGGGAGGTGCAC
+>EAS1_95:6:53:156:845
+ACACCCAACTAATATTTGTCTGAGCAAAACAGTCT
+>EAS1_95:6:53:156:845
+TTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCT
+>EAS1_95:6:87:734:888
+ATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGG
+>EAS1_95:6:87:734:888
+TGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTG
+>EAS1_95:7:155:530:532
+AGCAAGAAGATATAACCATCCTACTAAATACATAT
+>EAS1_95:7:155:530:532
+TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA
+>EAS1_95:7:280:607:113
+ATCCATGTAACAAATCTGCGCTTGTACTTCTAAAT
+>EAS1_95:7:280:607:113
+GGAGTATAAATTGTTTTCCACTTTGGAAAACAATT
+>EAS1_95:7:282:817:710
+TGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCC
+>EAS1_95:7:282:817:710
+TTTGTAATGAAAACTATATTTATGCTATTCAGTTC
+>EAS1_95:7:310:800:761
+AAAGCACACTTTAAATCAACAACAGTAAAATAAAA
+>EAS1_95:7:310:800:761
+CAATATTCTGATGATGGTTACACTAAAAGCCCATA
+>EAS1_95:7:46:522:426
+AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA
+>EAS1_95:7:46:522:426
+GGTAAAAAATTAACATTACAACAGGAACAAAACCT
+>EAS1_95:7:55:506:125
+CTTTACTGTCATAACTATGAAGAGACTACTGCCAG
+>EAS1_95:7:55:506:125
+TCTACCACAGGGGGCTGCGCGGTTTCCCATCATGA
+>EAS1_95:7:61:702:720
+ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA
+>EAS1_95:7:61:702:720
+CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC
+>EAS1_95:7:74:866:49
+CATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTC
+>EAS1_95:7:74:866:49
+CCAACCTACTAAATACATATGCACCTAACACAAGA
+>EAS1_97:2:128:629:484
+AAAAAAGTAAATAAAACACATAGCTAAAACTAAAA
+>EAS1_97:2:128:629:484
+GATTCATAAAACAAATACTACTAGACCTAAGAGGG
+>EAS1_97:2:193:420:78
+ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG
+>EAS1_97:2:193:420:78
+TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA
+>EAS1_97:2:59:882:980
+AATATAATTGTGTCCATGTACACACGCTGTCCTAT
+>EAS1_97:2:59:882:980
+GGGCTGTAATGATGCCCCTTGGCCATCACCCGGTC
+>EAS1_97:2:96:419:327
+TACTAAATACATATGCACCTAACACAAGACTACCC
+>EAS1_97:2:96:419:327
+TCTAAATCTATAACAAAATTAAAATTTAACAAAAG
+>EAS1_97:2:9:203:653
+CACCCAACTAATATTTGTCTGAGCAAAACAGTCTA
+>EAS1_97:2:9:203:653
+TCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTT
+>EAS1_97:3:147:423:584
+GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG
+>EAS1_97:3:147:423:584
+GCATACAGTCATCTATAAAGGAAATCCCATCAGAA
+>EAS1_97:3:160:173:889
+TATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAA
+>EAS1_97:3:160:173:889
+TCAAGATATGTAGTCATCAGACTATCTAAAGTCAA
+>EAS1_97:3:277:144:848
+TAACAATAGACTAAACCAAGCAGAAGAAAGAGGTT
+>EAS1_97:3:277:144:848
+TATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGG
+>EAS1_97:3:73:292:429
+GAAAGCTTTCAACGCTTCTAGCCATTTCTTTTTGC
+>EAS1_97:3:73:292:429
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS1_97:4:261:267:597
+GGGTAATAAAGTCAAGTCTTTCCTGACAAGCAAAT
+>EAS1_97:4:261:267:597
+TTGAGAATAAAGAGCAACTAGGTAAAAAATTAACA
+>EAS1_97:4:274:287:423
+CTAAATCTATAAAAAAATTAAAATTTAACAAAAGT
+>EAS1_97:4:274:287:423
+TTTTTTTTTTTTTTTTTCTCTTTTTTTTTTTTTTT
+>EAS1_97:4:290:121:79
+ATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCA
+>EAS1_97:4:290:121:79
+TGATGATGGTTACACTAAAAGCCCATACTTTACTG
+>EAS1_97:4:77:29:126
+ACAGGGAATTTGTCAATGTCAGGGAAGGAGCATTT
+>EAS1_97:4:77:29:126
+GCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCT
+>EAS1_97:4:83:731:540
+CTAGGTAAAAAATTAACATTACAACAGGAACAAAA
+>EAS1_97:4:83:731:540
+TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG
+>EAS1_97:5:154:952:558
+AAAGACTACCCAGATTCATAAAACAAATACTACTA
+>EAS1_97:5:154:952:558
+GCAAACAGAAACCAAATGAGAGAAGGAGTAGCTAT
+>EAS1_97:5:219:174:684
+AAAAAAACTGTCAAACACGAATGTTATGCCCTGCT
+>EAS1_97:5:219:174:684
+AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA
+>EAS1_97:5:28:538:148
+AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT
+>EAS1_97:5:28:538:148
+TCATCAAGATATGTAGTCATCAGACTATCTAAAGT
+>EAS1_97:5:318:177:383
+TACTACCCTGCAATTAATATAATTGTGTCCATGTA
+>EAS1_97:5:318:177:383
+TCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGC
+>EAS1_97:5:84:927:843
+CCAGCATGGTTGTACTGGGCAATACATGAGATTAT
+>EAS1_97:5:84:927:843
+TTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTT
+>EAS1_97:6:222:305:337
+TTTTTTTTTTTTTTTTTTCCCTTTTTTTTTTTTTT
+>EAS1_97:6:308:667:658
+AAAGATCACTTCAGCAATAAGATATAACCATCCTA
+>EAS1_97:6:308:667:658
+TAAAAACATGAACTAACTATATCCTTCTTACAATA
+>EAS1_97:6:93:334:858
+CTGCCCCCAGCATGGTTGTACTTGGCAATACATGA
+>EAS1_97:6:93:334:858
+GTACTTATCATGACTCTATCCCAAATTCCCAATTA
+>EAS1_97:7:20:979:96
+GAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCA
+>EAS1_97:7:264:642:506
+AAATATAGAAATTGAAACAGCTGTGTTTATTGTAT
+>EAS1_97:7:264:642:506
+ACTTCATCCAAAGATGAAACGCGTAACTGCGCTCT
+>EAS1_97:7:28:979:519
+AAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTA
+>EAS1_97:7:28:979:519
+CCCATACTTTACTGCTACTCAATATATCCATGTAA
+>EAS1_97:7:63:727:203
+AAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGA
+>EAS1_97:7:63:727:203
+AGTCTCTTATGAATTAACCCAGTCAGACAAAAATA
+>EAS1_97:7:9:648:712
+AAGAAGTATGAGATTATGTAAAGTAACTGAACCTA
+>EAS1_97:7:9:648:712
+TACATTGCAAGACAGACTTCATCAAGATATGTAGT
+>EAS1_97:8:36:927:478
+AAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATC
+>EAS1_97:8:36:927:478
+GACCCTACACGAATGCGTCTCTACCACAGGGGGCT
+>EAS1_99:1:17:595:863
+AAGCTACTCAATATATCCATGTAACAAATCTGCGC
+>EAS1_99:1:17:595:863
+ATAACCATCCTACTAAATACACATGCACCTAACTC
+>EAS1_99:1:187:715:521
+AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT
+>EAS1_99:1:187:715:521
+GCATACAGTCATCTATAAAGGAAATCCCATCAGAA
+>EAS1_99:1:34:649:318
+AAGAATAACAATGGGCTTCACAGCGGAACCCTTAC
+>EAS1_99:1:34:649:318
+ATGAAAGAGGCTCAAAGAATGCCAGGAAGATACAT
+>EAS1_99:1:86:871:319
+GGCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGA
+>EAS1_99:1:86:871:319
+TGCTAAGATAATTCATCATCACTAAACCAGTCCTA
+>EAS1_99:2:152:355:962
+CAGTCATCTATAAAGGAAATCCCATCAGAATAACA
+>EAS1_99:2:152:355:962
+TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS1_99:2:162:257:203
+AAAAAGATCAATTCAGCAAGAAGATATAACCATCC
+>EAS1_99:2:162:257:203
+ATATCCATGTAACAAATCTGCGCTTGTACTTCTAA
+>EAS1_99:2:188:782:483
+CTAAAATCAGCAAGAGAAAAGCATACAGTCATCTA
+>EAS1_99:2:188:782:483
+GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG
+>EAS1_99:3:118:851:285
+CCCAATTACGTCCTATCTTCTTCTTAGGGAAGAAC
+>EAS1_99:3:118:851:285
+TGGGCAATACATGAGATTATTAGGAAATGCTTTAC
+>EAS1_99:3:135:543:760
+ATTTGCCTTCAGACCCTACACGAATGCGTCTCTAC
+>EAS1_99:3:135:543:760
+TTCAAAAGGTGATGTGTGTTCTCATCAACCTCATA
+>EAS1_99:3:187:791:153
+AATACATGAGATTATTAGGAAATGCTTTACTGTCA
+>EAS1_99:3:187:791:153
+TACACGAATGCGTCTCTACCACAGGGGGCTGCGCG
+>EAS1_99:3:21:423:169
+ACTAAAAGCCCATACTTTACTGCTACTCAATATAT
+>EAS1_99:3:21:423:169
+GGAGTATAAATTGTTTTCCACTTTGGAAAACAATT
+>EAS1_99:3:61:183:767
+GTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATC
+>EAS1_99:3:61:183:767
+TTGCCAGATGAACCACACCTTAATACTATGTTTCT
+>EAS1_99:5:147:479:41
+CTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTT
+>EAS1_99:5:147:479:41
+TACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTA
+>EAS1_99:5:191:885:623
+TGCAAGACAGACTTCATCAAGATATGTAGTCATCA
+>EAS1_99:5:191:885:623
+TTAAAGAAAAAAAAACCTGTCAAACACGAATGTTA
+>EAS1_99:6:135:354:66
+GATACATTGCAAGACAGACTTCATCAAGATATGTA
+>EAS1_99:6:135:354:66
+TATGAGATTATGTAAAGTAACTGAACCTATGAGTC
+>EAS1_99:6:177:562:806
+ACAGTGTAGATGAGAGAGACCTTCCCTGGAGGTCT
+>EAS1_99:6:177:562:806
+TGATTTACTTGTTGTTGGTTTTCTGTTTCTTTTTT
+>EAS1_99:6:181:392:500
+GGAAATCCCATCAGAATAACAATGGGCTTCTCAGC
+>EAS1_99:6:181:392:500
+TTAGTCTTGCTAGAGATTTAGACATCTAAATGAAA
+>EAS1_99:6:63:48:631
+CAATACATGAGATTATTAGGAAATGCTTTACTGTC
+>EAS1_99:6:63:48:631
+TCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTA
+>EAS1_99:7:126:361:250
+AAAAAATTAACATTACAACAGGAACAAAACCTCAT
+>EAS1_99:7:126:361:250
+TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA
+>EAS1_99:7:171:196:287
+ATAACAATGGGCTTCTCAGCAGAAACCTTACAAGC
+>EAS1_99:7:171:196:287
+ATAATTCATCATCACTAAACCAGTCCTATAAGAAA
+>EAS1_99:7:183:645:699
+GTGGCCCTCCCCCATTCCCTGCCCCATCTCTTGTA
+>EAS1_99:7:183:645:699
+TATCCCAAATTCCCAATTACGTCCTATCTTCTTCT
+>EAS1_99:7:37:400:627
+ACATGAGATTATTAGGAAATGCTTTACTGTCATAA
+>EAS1_99:7:37:400:627
+TCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTG
+>EAS1_99:8:117:578:853
+AATGAACTTCTGTAATTGAAAAATTCATTTAAGAA
+>EAS1_99:8:152:778:228
+ATCATAAATACACACAAAAGTACAAAACTCACAGG
+>EAS1_99:8:152:778:228
+ATTTAAAAACATGAACTAACTATATGCTGTTTACA
+>EAS1_99:8:187:199:369
+TGAAAGAGGCTCAAAGAATGCCAGGAAGATACATT
+>EAS1_99:8:27:228:31
+AAATAAAACAAAGGAGGTCATCATACAATGATAAA
+>EAS1_99:8:27:228:31
+GGTTACACTAAAAGCCCATACTTTACTGCTACTCA
+>EAS1_99:8:99:756:130
+GAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCT
+>EAS1_99:8:99:756:130
+GACCCTACACGAATGCGTCTCTACCACAGGGGGCT
+>EAS218_1:2:10:686:1024
+ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT
+>EAS218_1:2:10:686:1024
+CATACAATGATAAAAAGATCAATTCAGCAAGAAGA
+>EAS218_1:2:15:1763:1143
+AAACAAATACTACTAGACCTAAGAGGGATGAGAAA
+>EAS218_1:2:15:1763:1143
+TGAGAGAAGGAGTAGCTATACTTATATCAGATAAA
+>EAS218_1:2:18:1498:1475
+CTTGGGCTGTAATGATGCCCCTTGGCCATCACCCG
+>EAS218_1:2:18:1498:1475
+GAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAA
+>EAS218_1:2:19:752:816
+CGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTG
+>EAS218_1:2:19:752:816
+TTTGGTGGAAGACATAATCCCACGCTTCCTATGGA
+>EAS218_1:2:26:211:481
+ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA
+>EAS218_1:2:26:211:481
+CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA
+>EAS218_1:2:40:1291:1045
+CTAACACAAGACTACCCAGATTCATAAAACAAATA
+>EAS218_1:2:40:1291:1045
+GTAAAGGGGTGGAAAAAGATGTTCTACGCAACAAG
+>EAS218_1:2:64:1318:1711
+GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT
+>EAS218_1:2:64:1318:1711
+TGAAAACTATATTTATGCTATTCAGTTCTAAATAT
+>EAS218_1:4:14:1872:1521
+TCAAAGAATGCCAGGAAGATACATTGCAAGTCAGA
+>EAS218_1:4:14:1872:1521
+TCATCAAAAACCTTACAAGCCAGAAGAGATTGGAT
+>EAS218_1:4:15:856:340
+CACGCTGTCCTATGTACTTATCATGACTCTATCCC
+>EAS218_1:4:15:856:340
+CCCCAGCATGGTTGCACTGGGCAATACATGAGATT
+>EAS218_1:4:28:315:310
+AAACTGTTCTCTTGAAAGCTTGGGCTGTAATGATG
+>EAS218_1:4:28:315:310
+CATGTACACACGCTGTCCTATGTACTTATCATGAC
+>EAS218_1:4:37:1626:862
+ACCCAACTAATATTTGTCTGAGCAAAACAGTCTAG
+>EAS218_1:4:37:1626:862
+TCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCT
+>EAS218_1:4:61:1369:440
+AAAGACATGATTTCAGGTAAAGGGGTGGAAAAAGA
+>EAS218_1:4:61:1369:440
+CAGGTTTTATAAAACAATTAATTGAGACTACAGAG
+>EAS218_1:4:62:561:531
+AGCTATACTTATATCAGATAAAGCACACTTTAAAT
+>EAS218_1:4:62:561:531
+TACTAGACCTAAGAGGGATGAGAAATTACCTAATT
+>EAS218_1:4:71:832:743
+ACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTC
+>EAS218_1:4:71:832:743
+CTATGTACTTATCATGACTCTATCCCAAATTCCCA
+>EAS218_1:4:73:42:1038
+AAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGT
+>EAS218_1:4:73:42:1038
+TCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTT
+>EAS218_1:4:75:555:1591
+TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT
+>EAS218_1:6:49:905:27
+CCACAGGGGGCTGCGCGGTTTCCCATCCTGAAGCA
+>EAS218_1:6:49:905:27
+GAAGAGACTATTGCCAGTTGAACCACACATTAATA
+>EAS218_1:6:66:1282:1215
+GTGATGTGTGTTCTCATCAACCTCATACACACACA
+>EAS218_1:6:66:1282:1215
+TTCAGACCCTACACGAATGCGTCTCTACCACAGGG
+>EAS218_1:6:77:1529:522
+AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA
+>EAS218_1:6:77:1529:522
+AAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS218_1:6:88:1413:14
+AATGAAAGAGGCTCAAAGAATGCCAGGAAGATACA
+>EAS218_1:8:13:1729:1844
+ATGAACTTCTGTAATTGAAAAATTCATTTAAGAAA
+>EAS218_1:8:16:1081:1894
+AAGGTGATGTGTGTTCTCATCAACCTCATACACAC
+>EAS218_1:8:16:1081:1894
+AGATGAAACGCGTAACTGGGCTCTCATTCACTCCA
+>EAS218_1:8:26:785:882
+CAGTTTCTGCCCCAAGCATGGTTGTACTGGGCAAT
+>EAS218_1:8:26:785:882
+TACTTATCATGACTCTATCCCAAATTCCCAATTAC
+>EAS218_1:8:61:1797:113
+CAGATAGCTTGTGGTCTGACAGGCTGCAACTGTGA
+>EAS218_1:8:61:1797:113
+GGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGA
+>EAS218_1:8:70:445:1289
+CTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTG
+>EAS218_1:8:70:445:1289
+GAATTGTAAAAGTCAAAATTAAAGTTCAATACTCA
+>EAS218_1:8:82:1540:77
+ATTGAGACTACAGAGCAACTAGGTAAAAAATTAAC
+>EAS218_1:8:82:1540:77
+GAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGC
+>EAS218_1:8:90:706:1276
+AATTAACATTACAACAGGAACAAAACCTCATATAT
+>EAS218_1:8:90:706:1276
+GTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAG
+>EAS218_4:1:34:1614:558
+CAACATGAAGGAAAAAAATTCTAAAATCAGCAAGA
+>EAS218_4:1:34:1614:558
+GTCAAACACGAATGTTATGCCCTGCTAAACTAAGC
+>EAS218_4:1:48:9:409
+CAGCTCCCTGTCACCCAATGGACCTGTGATATCTG
+>EAS218_4:1:48:9:409
+GTTTAGTGCCTTTGTTCACATAGACCCCCTTGCAA
+>EAS218_4:1:84:1505:1037
+GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA
+>EAS218_4:1:84:1505:1037
+TGCCCTGCTAAACTAAGCATCATAAATGAAGGGGA
+>EAS218_4:1:9:206:901
+AGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGA
+>EAS218_4:1:9:206:901
+CTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTC
+>EAS218_4:3:12:630:707
+ATTTAAAAACATGAACTAACTATATGCTGTTTACA
+>EAS218_4:3:12:630:707
+CACAAAAGTACAAAACTCACAGGTTTTATAAAACA
+>EAS218_4:3:39:1671:1928
+AGCCCATACTTTACTGCTACTCAATATATCCATGT
+>EAS218_4:3:39:1671:1928
+CAAATATTGCTAGTGGGAGTATAAATTGTTTTCCA
+>EAS218_4:3:41:1281:1785
+ACTATCTAAAGTCAACATGAAGGAAAAAAATTCTA
+>EAS218_4:3:41:1281:1785
+GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT
+>EAS218_4:3:65:85:1547
+AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA
+>EAS218_4:3:65:85:1547
+GTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAA
+>EAS218_4:5:41:118:1246
+ACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAA
+>EAS218_4:5:41:118:1246
+CTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTG
+>EAS218_4:5:63:875:1339
+CCCCAGATACCATCCCTGTCTTACTTCCAGCTCCC
+>EAS218_4:5:63:875:1339
+GGGAACAGGGAGGTGCACTAATGCGCTCCACGCCC
+>EAS218_4:7:71:31:1973
+AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGC
+>EAS218_4:7:72:1288:1211
+ATTGGTACAATGTACAATATTCTGATGATGGTTAC
+>EAS218_4:7:72:1288:1211
+GATAAAGCACACTTTAAATCAACAACAGTAAAATA
+>EAS218_4:7:85:923:726
+ACAAAATATAGTTGAAAGCTCTAACAATAGACTAA
+>EAS218_4:7:85:923:726
+GTAAAGTAACTGAACCTATGAGTCACAGGTATTCC
+>EAS218_4:7:87:964:826
+CACAGGGGGCTGCGCGGTTTCCCATCATGAAGCAC
+>EAS218_4:7:87:964:826
+TGAAGAGACTATTGCCAGATGAACCACACATTAAT
+>EAS218_4:7:89:1487:520
+CACGAATGCGTCTCTACCACAGGGGGCTGCGCGGT
+>EAS218_4:7:89:1487:520
+TATGAAGAGACTATTGCCAGATGAACCACACATTA
+>EAS218_4:7:90:1873:89
+GAGATTCTGCAGCCCAGATCCAGATTGCTTGTGGT
+>EAS218_4:7:90:1873:89
+GCATTTTGTCAGTTACCAAATGTGTTTATTACCAG
+>EAS219_1:1:22:490:2011
+ATAACAATGGGCTTCTCAGCGGAAACCTTACAAGC
+>EAS219_1:1:22:490:2011
+GCTAGAGATTTAGACATCTAAATGAAAGAGGCTCA
+>EAS219_1:1:37:1004:1136
+CCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTT
+>EAS219_1:1:37:1004:1136
+GTCACCCAATGGACCTGTGATATCTGGATTCTGGG
+>EAS219_1:1:44:1466:425
+GTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGG
+>EAS219_1:1:44:1466:425
+TTATCTGCACATTACTACCCTGCAATTAATATAAT
+>EAS219_1:1:50:257:341
+AAATTAACATTACAACAGGAACAAAACCTCATATA
+>EAS219_1:1:50:257:341
+TGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTA
+>EAS219_1:1:5:497:687
+AAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTC
+>EAS219_1:1:5:497:687
+TTGCCTTCAGACCCTACACGAATGCGTCTCTACCA
+>EAS219_1:1:60:1420:660
+AATGCTAAGATAATTCATCATCACTAAACCAGTCC
+>EAS219_1:1:60:1420:660
+GTAAAAAATTAACATTACAACAGGAACAAAACCTC
+>EAS219_1:1:63:28:1549
+AAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTA
+>EAS219_1:1:63:28:1549
+TACAATATTCTGATGATGGTTACACTAAAAGCCCA
+>EAS219_1:1:67:191:668
+ACTATGAAGAGACTATTGCCAGATGAACCACACCT
+>EAS219_1:1:67:191:668
+CCAATTACGTCCTATCTTCTTCTTAGGGAAGAACA
+>EAS219_1:3:11:706:1030
+ATCTCTTGTAATCTCTCTCATCTTTGCTGCATCCC
+>EAS219_1:3:11:706:1030
+ATGTCTATTTTTGTCTTGACACCCAACTAATATTT
+>EAS219_1:3:33:1168:1762
+AGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAG
+>EAS219_1:3:33:1168:1762
+GTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCA
+>EAS219_1:3:4:1620:413
+CATCACAATGAACAACAGGAAGAAAAGGTCTTTCA
+>EAS219_1:3:4:1620:413
+TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC
+>EAS219_1:3:62:603:1552
+AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT
+>EAS219_1:3:62:603:1552
+GAACCACACATTAATACTATGTTTCTTATCTGCAC
+>EAS219_1:3:88:465:1877
+AAAGCACACTTTAAATCAACAACAGTAAAATAAAA
+>EAS219_1:3:88:465:1877
+TAAGAGGGATGAGAAATTACCTAATTGGTACAATG
+>EAS219_1:3:90:219:528
+ACGAATGTTATGCCCTGCTAAACTAAGCATCATAA
+>EAS219_1:3:90:219:528
+GTACAAAACTCACAGGTTTTATAAAACAATTAATT
+>EAS219_1:5:5:259:250
+GTCTCTTATGAATTAACCCAGTCAGACAAAAATAA
+>EAS219_1:5:5:259:250
+TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS219_1:5:6:1067:91
+CTTGACACCCAACTAATATTTGTCTGAGCAAAACA
+>EAS219_1:5:6:1067:91
+CTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGAT
+>EAS219_1:7:16:1343:1621
+AAATTCTAAAATCAGCAAGAGAAAAGCATACAGTC
+>EAS219_1:7:16:1343:1621
+AAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTA
+>EAS219_1:7:18:571:1110
+GAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTC
+>EAS219_1:7:18:571:1110
+TAATATAATTGTGTCCATGTACACACGCTGTCCTA
+>EAS219_1:7:20:1444:328
+AAAAGATGTTCTACGCAAACAGAAACCAAATGAGA
+>EAS219_1:7:20:1444:328
+TACTAAATACATATGCACCTAACACAAGACTACCC
+>EAS219_1:7:35:392:2042
+ATAAATCTATAAAAAAATTAAAATTTAACAAAAGT
+>EAS219_1:7:35:392:2042
+TAACACAAGACTACCCAGATTCATAAAACNAATAC
+>EAS219_1:7:50:1339:1154
+CATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCC
+>EAS219_1:7:50:1339:1154
+GTCTTGACACCCAACTAATATTTGTCTGAGCAAAA
+>EAS219_1:7:62:1076:540
+CCCCCACTTAAGAGATATAGATTGGCAGAACAGAT
+>EAS219_1:7:62:1076:540
+TAAATCAACAACAGTAAAATAAAACAAAGGAGGTC
+>EAS219_1:7:94:1655:1921
+AAAAGCATACAGTCATCTATAAAGGAAATCCCATC
+>EAS219_1:7:94:1655:1921
+TTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAA
+>EAS219_FC30151:1:18:1418:237
+CCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGG
+>EAS219_FC30151:1:18:1418:237
+CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTC
+>EAS219_FC30151:1:53:140:421
+AACCAAATGAGAGAAGGAGTAGCTATACTTATATC
+>EAS219_FC30151:1:53:140:421
+GATTCATAAAACAAATACTACTAGACCTAAGAGGG
+>EAS219_FC30151:1:54:436:1452
+AAGACAAGTCTCTTATGAATTAACCCAGTCAGACA
+>EAS219_FC30151:1:54:436:1452
+AGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCT
+>EAS219_FC30151:1:55:8:1412
+ATATAATTGTGTCCATGTACACACGCTGTCCTATG
+>EAS219_FC30151:1:55:8:1412
+GCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCAT
+>EAS219_FC30151:1:76:34:691
+ATATATCAATATTAACTTTGAATAAAAAGGGATTA
+>EAS219_FC30151:1:88:1454:418
+CCAGGAAGATACATTGCAAGACAGACTTCATCAAG
+>EAS219_FC30151:1:88:1454:418
+GAAGAGATTGGATCTAATTTTTGGACTTCTTAAAG
+>EAS219_FC30151:3:13:674:1717
+AGAAAAGCATGCAGTCATCTATAAAGGAAATCCCA
+>EAS219_FC30151:3:13:674:1717
+TAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAG
+>EAS219_FC30151:3:40:1128:1940
+CCCCTTACAACAACCTTGAGAACCCCAGGGAATTT
+>EAS219_FC30151:3:40:1128:1940
+CCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACC
+>EAS219_FC30151:3:55:74:1040
+CTACCACAGGGGGCTGCGCGGTTTCCCATCATGAA
+>EAS219_FC30151:3:55:74:1040
+GGAAATGCTTTACTGTCATAACTATGAAGAGACTA
+>EAS219_FC30151:3:73:1458:1337
+AAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGAC
+>EAS219_FC30151:3:73:1458:1337
+AGGTAAAAAATTAACATTACAACAGGAACAAAACC
+>EAS219_FC30151:3:81:1723:1820
+ATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTT
+>EAS219_FC30151:3:81:1723:1820
+CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG
+>EAS219_FC30151:3:90:1906:1528
+CACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAA
+>EAS219_FC30151:3:90:1906:1528
+TTACAAAATATAGTTGAAAGCTCTAACAATAGACT
+>EAS219_FC30151:3:9:1595:1826
+ACTAATATTTGTCTGAGCAAAACAGTCTAGATGAG
+>EAS219_FC30151:3:9:1595:1826
+ATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTC
+>EAS219_FC30151:5:29:817:854
+AGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTG
+>EAS219_FC30151:5:29:817:854
+GTTCTCAAGGTTGTTGCAAGGGGGTTTATGTGAAC
+>EAS219_FC30151:5:54:1351:910
+ACTAAAAGCCCATACTTTACTGCTACTCAATATAT
+>EAS219_FC30151:5:54:1351:910
+ACTCTCAAATATTGCTAGTGGGAGTATAAATTGTT
+>EAS219_FC30151:5:63:424:1643
+GACCCTACACGAATGCGTCTCTACCACAGGGGGCT
+>EAS219_FC30151:5:63:424:1643
+GGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACC
+>EAS219_FC30151:5:6:1243:981
+ATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAA
+>EAS219_FC30151:5:6:1243:981
+TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT
+>EAS219_FC30151:5:70:348:972
+GAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTC
+>EAS219_FC30151:5:70:348:972
+TCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTT
+>EAS219_FC30151:5:72:1426:1883
+ACATAATCCCACGCTTCCTATGGAAAGGTTGTTGG
+>EAS219_FC30151:5:72:1426:1883
+CATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGC
+>EAS219_FC30151:7:11:1261:1200
+AAAAAAAACCTGTCAAACACGAATGTTATGCCCTG
+>EAS219_FC30151:7:11:1261:1200
+TTGCAAGACAGACTTCATCAAGTTATGTAGTCATC
+>EAS219_FC30151:7:51:1429:1043
+TATTTGTAATGAAAACTATATTTATGCTATTCAGT
+>EAS219_FC30151:7:87:1289:83
+ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA
+>EAS219_FC30151:7:87:1289:83
+ATGCCCTGCTAAACTAAGCATCATAAATGAAGGGG
+>EAS219_FC30151:7:94:1440:2016
+AACCTGTCAAACACGAATGTTATGCCCTGCTAAAC
+>EAS219_FC30151:7:94:1440:2016
+CACAAAAGTACAAAACTCACAGGTTTTATAAAACA
+>EAS220_1:2:11:1274:1230
+TGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCT
+>EAS220_1:2:11:1274:1230
+TTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTT
+>EAS220_1:2:43:656:1866
+TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT
+>EAS220_1:2:47:591:698
+CAAATCTGCGCTTGTACTTCTAAATCTATAACAAA
+>EAS220_1:2:47:591:698
+TCCTACTAAATACATATGCACCTAACACAAGACTA
+>EAS220_1:2:50:513:882
+AAAACAAATACTACTAGACCTAAGAGGGATGAGAA
+>EAS220_1:2:50:513:882
+GGAGTAGCTATACTTATATCAGATAAAGCACACTT
+>EAS220_1:2:52:1779:1664
+CCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTG
+>EAS220_1:2:52:1779:1664
+TGTTAAAATGTCTATTTTTGTCTTGACACCCAACT
+>EAS220_1:2:54:91:1232
+AAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG
+>EAS220_1:2:54:91:1232
+AAAGCTCTAACAATAGACTAAACCAAGCAGAAGAA
+>EAS220_1:2:62:1109:804
+TAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTA
+>EAS220_1:2:62:1109:804
+TGTCAGTTACCAAATGTGTTTATTACCAGAGGGAT
+>EAS220_1:2:63:267:545
+ATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTT
+>EAS220_1:2:63:267:545
+CTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTG
+>EAS220_1:2:72:1809:1398
+AATACATGAGATTATTAGGAAATGCTTTACTGTCA
+>EAS220_1:2:72:1809:1398
+CTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATC
+>EAS220_1:4:100:20:1199
+AAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCA
+>EAS220_1:4:100:20:1199
+CAGTCATCTATAAAGGAAATCCCATCAGAATAACA
+>EAS220_1:4:14:1665:1772
+GGGCTGTAATGATGCCCCTTGGCCATCACCCGGTC
+>EAS220_1:4:14:1665:1772
+TATAATGGTGTCCATGTACACACGCTGTCCTATGT
+>EAS220_1:4:46:1566:668
+CTACTAAATACATATGCACCTAACACAAGACTACC
+>EAS220_1:4:46:1566:668
+TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA
+>EAS220_1:4:69:88:1154
+ATAACTATGAAGAGACTATTGCCAGATGAACCACA
+>EAS220_1:4:69:88:1154
+CACGAATGCGTCTCTACCACAGGCGGCTGCGCGGT
+>EAS220_1:4:6:1178:1105
+GATAATTCATCATCACTAAACCAGTCCTATAAGAA
+>EAS220_1:4:6:1178:1105
+GGAACAAAACCTCATATATCAATATTAACTTTGAA
+>EAS220_1:4:70:766:2016
+AAAAAAATTCTAAAATCAGCAAGAGAAAAGCATAC
+>EAS220_1:4:70:766:2016
+ATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTG
+>EAS220_1:6:24:105:1046
+AGATTCATAAAACAAATACTACTAGACCTAAGAGG
+>EAS220_1:6:24:105:1046
+CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA
+>EAS220_1:6:7:1547:1933
+AATATTTGACTGAGCAAAACAGTCTAGATGAGAGA
+>EAS220_1:6:7:1547:1933
+CTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTG
+>EAS220_1:8:18:1757:95
+ATGAGTCGCAGGTATTCCTGAGGAAAAAGAAAAAG
+>EAS220_1:8:18:1757:95
+CTTCATCAAGATATGTAGTCATCAGACTATCTAAA
+>EAS220_1:8:33:672:473
+ATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAAT
+>EAS220_1:8:33:672:473
+TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT
+>EAS220_1:8:38:1576:1923
+CACAGGGGGCTGCGCGGTTTCCCATCATGAAGCAC
+>EAS220_1:8:38:1576:1923
+CTGTCATAACTATGAAGAGACTATTGCCAGATGAA
+>EAS220_1:8:45:178:1321
+AGGTTTTATAAAACAATTAATTGAGACTACAGAGC
+>EAS220_1:8:45:178:1321
+CATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCT
+>EAS220_1:8:46:1528:799
+ATGATAAAAAGATCAATTCAGCAAGAAGATATAAC
+>EAS220_1:8:46:1528:799
+CATGTAACAAATCTGCGCTTGTACTTCTAAATCTA
+>EAS220_1:8:46:485:482
+AGAGATTCTGCAGCCCAGATCCAGATTGCTTGTGG
+>EAS220_1:8:46:485:482
+ATTACCAGAGGGATGAAGGGAAGAGGGACGCTGAA
+>EAS220_1:8:5:996:2000
+AGTCAACATGAAGGAAAAAAATTCTAAAATCAGCA
+>EAS220_1:8:5:996:2000
+CACGAATGTTATGCCCTGCTAAACTAAGCATCATA
+>EAS220_1:8:66:1046:167
+ACAATGTACAATATTCTGATGATGGTTACACTAAA
+>EAS220_1:8:66:1046:167
+ACACTTTAAATCAACAACAGTAAAATAAAACAAAG
+>EAS220_1:8:83:1456:1854
+AAGATCAATTCAGCAAGAAGATATAACCATCCTAC
+>EAS220_1:8:83:1456:1854
+AAGCCCATACTTTACTGCTACTCAATATATCCATG
+>EAS221_1:2:23:127:880
+CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA
+>EAS221_1:2:23:127:880
+TAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT
+>EAS221_1:2:24:1037:84
+TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGC
+>EAS221_1:2:24:1037:84
+TTCAGTTCTAAATATAGAAATTGAAACAGCTGTGT
+>EAS221_1:2:29:1486:672
+AATTGAAACAGCTGTGTTTAGTGCCTTTGTTCACA
+>EAS221_1:2:3:542:428
+AAGACATGAGTTCAGGTACAGGGGTGGAAAAAGAT
+>EAS221_1:2:3:542:428
+AGAGCAACTAGGTAAAAAATTAACATTACAACAGG
+>EAS221_1:2:3:945:2005
+AACCAAGCAGAAGAAAGAGGCTCAGAACTTGAAGA
+>EAS221_1:2:3:945:2005
+GAAAAACTATTTGAGGAAGTAATTGGGGAAAACCT
+>EAS221_1:2:52:1144:509
+AAAAGGGATTAAATTCCCCCACTTAAGAGATATAG
+>EAS221_1:2:52:1144:509
+TGTAAAAGTCAAAATTAAAGTTCAATACTCACCAT
+>EAS221_1:2:73:955:728
+AATTCATCATCACTAAACCAGTCCTATAAGAAATG
+>EAS221_1:2:73:955:728
+TACAACAGGAACAAAACCTCATATATCAATATTAA
+>EAS221_1:2:8:327:522
+AACAGGAACAAAACCTCATATATCAATATTAACTT
+>EAS221_1:2:8:327:522
+TTCTACGCAAACAGAAACCAAATGAGAGAAGGAGT
+>EAS221_1:2:90:986:1224
+CTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAAC
+>EAS221_1:2:91:856:504
+CTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGG
+>EAS221_1:2:91:856:504
+GTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTT
+>EAS221_1:4:36:1402:1709
+AGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTC
+>EAS221_1:4:36:1402:1709
+TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATT
+>EAS221_1:4:3:248:1491
+TTAAAATTTAACAAAAGTAAATAAAACACATAGCT
+>EAS221_1:4:41:519:609
+AACAAAAACTATGCTAAGTATTGGTAAAGATGTGG
+>EAS221_1:4:41:519:609
+TACCTAATTGGTACAATGGACAATATTCTGATGAT
+>EAS221_1:4:4:1732:88
+GCTGTAATGATGCCCCTTGGCCATCACCCGGTCCC
+>EAS221_1:4:4:1732:88
+TGTACACACGCTGTCCTATGTACTTATCATGACTC
+>EAS221_1:4:68:64:783
+AAGACATAATCCCACGCTTCCTATGGAAAGGTTGT
+>EAS221_1:4:68:64:783
+TCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGT
+>EAS221_1:4:87:1375:1303
+AGGAGCATTTTGTCAGTTACCAAATGTGTTTATTA
+>EAS221_1:4:87:1375:1303
+GAGAGATTCTGCAGCCCAGATCCAGATTGCTTGTG
+>EAS221_1:6:38:1071:155
+ATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATC
+>EAS221_1:6:38:1071:155
+TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT
+>EAS221_1:6:4:1131:104
+ACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTT
+>EAS221_1:6:4:1131:104
+ATGTAACAAATCTGCGCTTGTACTTCTAAATCTAT
+>EAS221_1:6:57:1342:1166
+AAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGA
+>EAS221_1:6:57:1342:1166
+CAGAGCAACTAGGTAAAAAATTAACATTACAACAG
+>EAS221_1:6:60:1037:1146
+AAAAGCATACAGTCATCTATAAAGGAAATCCCATC
+>EAS221_1:6:60:1037:1146
+GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT
+>EAS221_1:6:69:735:1915
+AATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAG
+>EAS221_1:6:69:735:1915
+ACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTT
+>EAS221_1:6:89:1164:573
+AAAAAACCTGTCAAACACGAATGTTATGCCCTGCT
+>EAS221_1:6:89:1164:573
+AGACTTCATCAAGAGATGTAGTCATCAGACTATCT
+>EAS221_1:6:92:1807:1185
+AGCATGGTTGTACTGGGCAATACATGAGATTATTA
+>EAS221_1:6:92:1807:1185
+CTCTATCCCAAATTCCCAATTACGTCCTATCTTCT
+>EAS221_1:6:96:491:1891
+AGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGG
+>EAS221_1:6:96:491:1891
+GTCAACATGAAGGAAAAAAATTCTAAAATCAGCAA
+>EAS221_1:8:15:881:1932
+CACTTAAGAGATATAGATTGGCAGAACAGATTTAA
+>EAS221_1:8:15:881:1932
+CACTTTAAATCAACAACAGTAAAATAAAACAAAGG
+>EAS221_1:8:4:679:110
+AATGTTCCCCAGATACCATCCCTGTCTTACTTCCA
+>EAS221_1:8:4:679:110
+TCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGT
+>EAS221_1:8:58:369:244
+CTGTAATGATGCCCCTTGGCCATCACCCAGTCCCT
+>EAS221_1:8:58:369:244
+TTGGGAGATTTTTAATGATTCCTCAATGTTAAAAT
+>EAS221_1:8:60:1020:1259
+CTATGAAGAGACTATTGCCAGATGAACCACACATT
+>EAS221_1:8:60:1020:1259
+TAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTC
+>EAS221_1:8:65:1928:1125
+CATCACAATGAACAACAGGAAGAAAAGGTCTTTCA
+>EAS221_1:8:65:1928:1125
+GGCATTTGCCTTCAGACCCTACACGAATACGTCTC
+>EAS221_1:8:67:1797:1931
+GGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTG
+>EAS221_1:8:67:1797:1931
+TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT
+>EAS221_1:8:70:1349:1788
+ATTATATCAGATAAAGCACACTTTAAATCAACAAC
+>EAS221_1:8:70:1349:1788
+TACTAGACCTAAGAGGGATGAGAAATTACCTAATT
+>EAS221_1:8:73:108:1621
+GAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTC
+>EAS221_1:8:73:108:1621
+GTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACA
+>EAS221_1:8:77:781:676
+TCATGAAGCACTGAACTTCCACGTATCATCTAGGG
+>EAS221_1:8:77:781:676
+TTGCCAGATGAACCACACATTAATACTATGTTTCT
+>EAS221_1:8:78:1478:1446
+GGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTT
+>EAS221_1:8:78:1478:1446
+TGTGGTCTGACAGGCTGCAACTGTGAGCCATCACA
+>EAS221_1:8:84:1013:1074
+GCAAGGGGGTCTATGTGAACAAAGGCACTAAACAC
+>EAS221_1:8:84:1013:1074
+TTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAA
+>EAS221_1:8:8:1351:1986
+CCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATG
+>EAS221_1:8:8:1351:1986
+TCTTACTTCCAGATCCCCAGAGGGAAAGCTTTCAA
+>EAS221_3:2:100:1147:124
+AAAAAAGAATTTTAAAAATGAACAGAGCTTTCAAG
+>EAS221_3:2:100:1147:124
+AATGCCAGGAAGATACATTGCAAGACAGACTTCAT
+>EAS221_3:2:22:1623:709
+GAAGACAAGTCTCTTATGAATTAACCCAGTCAGAC
+>EAS221_3:2:22:1623:709
+GGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGAC
+>EAS221_3:2:2:491:1886
+CTCTTTAGTCTTGCTAGAGATTTAGACATCTAAAT
+>EAS221_3:2:59:1576:946
+AACTGTGAGCCATCACAATGAACAACAGGAAGAAA
+>EAS221_3:2:59:1576:946
+CAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTT
+>EAS221_3:2:60:590:1760
+AAAGTAACTGAACCTATGAGTCACAGGTATTCCTG
+>EAS221_3:2:60:590:1760
+TCATCAAGATATGTAGTCATCAGACTATCTAAAGT
+>EAS221_3:2:67:1467:1447
+AAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS221_3:2:67:1467:1447
+ATAGATTGGCAGAACAGATTTAAAAACATGAACTA
+>EAS221_3:2:67:1864:477
+AATGATGCCCCTTGGCCATCACCCAGTCCCTGCCC
+>EAS221_3:2:67:1864:477
+TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT
+>EAS221_3:2:76:1729:813
+TAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTT
+>EAS221_3:2:76:1729:813
+TCTGCGCTTGTACTTCTAAATCTATAAAAAAATTA
+>EAS221_3:4:12:276:1797
+ACTATTGCCAGATGAACCACACATTAATACTATGT
+>EAS221_3:4:12:276:1797
+TCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTC
+>EAS221_3:4:21:132:1423
+GCCAGATGAACCACACATTAATACTATGTTTCTTA
+>EAS221_3:4:21:132:1423
+TCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCC
+>EAS221_3:4:29:1061:574
+ATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGG
+>EAS221_3:4:29:1061:574
+GGTCTGACAGGCTGCAACTGTGAGCCATCACAATG
+>EAS221_3:4:30:1452:1563
+ATGAATTAACCAAGTCAGACAAAAATAAAGAAAAA
+>EAS221_3:4:30:1452:1563
+GATTTAGACATCTAAATGAAAGAGGCTCAAAGAAT
+>EAS221_3:4:41:1308:619
+CATTGCAAGACAGACTTCATCAAGATATGTAGTCA
+>EAS221_3:4:41:1308:619
+GAAGTATGAGATTATGTAAAGTAACTGAACCTATG
+>EAS221_3:4:57:1675:720
+TATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAA
+>EAS221_3:4:57:1675:720
+TCATATATCAATATTAACTTTGAATAAAAAGGGAT
+>EAS221_3:4:66:584:407
+GCATTTGCCTTCAGACCCTACACGAATGCGTCTCT
+>EAS221_3:4:66:584:407
+GGGCAATACATGAGATTATTAGGAAATGCTTTACT
+>EAS221_3:4:78:1314:1275
+AGGAAATCCCATCAGAATAACAATGGGCTTCTCAG
+>EAS221_3:4:78:1314:1275
+GAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAG
+>EAS221_3:4:81:687:1379
+CCTAAGAGGGATGAGAAATTACCTAATTGGTACAA
+>EAS221_3:4:81:687:1379
+TAAATAAAACACATAGCTAAAACTAAAAAAGCAAA
+>EAS221_3:4:90:247:212
+ACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCT
+>EAS221_3:4:90:247:212
+TGACAGGCTGCAACTGTGAGCCATCACAATGAACA
+>EAS221_3:6:20:492:850
+AGTATGAAAACAATGTTCCCCAGATGCCGTCCCGG
+>EAS221_3:6:20:492:850
+CCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTT
+>EAS221_3:6:26:227:1053
+ATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCC
+>EAS221_3:6:26:227:1053
+GGTTTAGGGGTATAATACCTCTACATGGCTGATTA
+>EAS221_3:6:51:1486:1131
+ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA
+>EAS221_3:6:51:1486:1131
+TCAGCAGAAACCTTACAAGCCAGAAGAGATTGGAT
+>EAS221_3:6:70:843:706
+AATGATTCCTCAATGTTAAAATGTCTATTTTTGTC
+>EAS221_3:6:70:843:706
+ATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCC
+>EAS221_3:8:33:1240:846
+ACCTTACAAGCCAGAAGAGATTGGATCTAATTTTT
+>EAS221_3:8:33:1240:846
+ATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAA
+>EAS221_3:8:34:956:1309
+AACTATGAAGAGACTATTGCCAGATGAACCACACA
+>EAS221_3:8:34:956:1309
+AGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTC
+>EAS221_3:8:50:1203:1094
+AAATATAGTTGAAAGCTCTAACAATAGACTAAACC
+>EAS221_3:8:50:1203:1094
+ACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAG
+>EAS221_3:8:55:932:613
+TAGTCATCAGACTATCTAAAGTCAACATGAAGGAA
+>EAS221_3:8:55:932:613
+TGTCAAACACGAATGTTATGCCCTGCTAAACTAAG
+>EAS221_3:8:63:1265:820
+CTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTC
+>EAS221_3:8:63:1265:820
+TGTCTTGACACCCAACTAATATTTGTCTGAGCAAA
+>EAS221_3:8:65:463:703
+GAAACCTTACAAGCCAGAAGAGATTGGATCTAATT
+>EAS221_3:8:65:463:703
+TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTT
+>EAS221_3:8:74:770:1712
+ACATTACTACCCTGCAATTAATATAATTGTGTCCA
+>EAS221_3:8:74:770:1712
+GAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCAC
+>EAS221_3:8:7:1864:1569
+AAAAACCTGTCAAACACGAATGTTATGCCCTGCTA
+>EAS221_3:8:7:1864:1569
+AGATATGTAGTCATCAGACTATCTAAAGTCAACAT
+>EAS51_62:1:38:250:647
+AATAATAAAATGATAAAAAGATCAATTCAGCAAGA
+>EAS51_62:1:38:250:647
+ACTATATGCTGTTTACAAGAAACTCATTAATAAAT
+>EAS51_62:2:133:8:379
+ATAAGATAATTCATCATCACTAAACCAGTCCTATA
+>EAS51_62:2:133:8:379
+GGAAATCCCATCAGAATAACAATGGGCTTCTCAGC
+>EAS51_62:2:258:266:101
+ACGCTGTCCTATGTACTTATCATGACTCTATCCCA
+>EAS51_62:2:258:266:101
+CCATCACCCAGTCCCTGCCCCATCTCTTGTAATCT
+>EAS51_62:2:260:147:818
+AAAATTTGGTAATTTAGTTTTTTTTTTTTTCTTTT
+>EAS51_62:2:260:147:818
+ATCCATGTAACAAATCTGCGCTTTTACTTCTAAAT
+>EAS51_62:3:103:443:166
+ACCTGTCAAACACGAATGTTATGCCCTGCTAAACT
+>EAS51_62:3:103:443:166
+TACACACAAAAGTACAAAACTCACAGGTTTTATAA
+>EAS51_62:3:169:292:652
+ATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCC
+>EAS51_62:3:169:292:652
+GCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGA
+>EAS51_62:3:200:263:280
+AGTAAAATAAAACAAAGGAGGTCATCATACAATGA
+>EAS51_62:3:200:263:280
+TTGGTACAATGTACAATATTCTGATGATGGTTACA
+>EAS51_62:3:263:74:407
+AAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCAT
+>EAS51_62:3:263:74:407
+CTGCAACTGTGAGCCATCACAATGAACAACAGGAA
+>EAS51_62:3:314:386:190
+AGACCCCCTTGCAACAACCTTGAGAACCCCAGGGA
+>EAS51_62:3:314:386:190
+CACTCCAGCTCCCTGTCACCCAATGGACCTGTGAT
+>EAS51_62:3:50:312:219
+ACTGCTACTCAATATATCCATGTAACAAATCTGCG
+>EAS51_62:3:50:312:219
+TCCTACTAAATACATATGCACCTAACACAAGACTA
+>EAS51_62:3:55:340:837
+TCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAAT
+>EAS51_62:3:55:340:837
+TTAATATAATTGTGTCCATGTACACACGCTGTCCT
+>EAS51_62:3:68:996:104
+AGAGGGATGAGAAATTACCTAATTGGTACAATGTA
+>EAS51_62:3:68:996:104
+TACTTATATCAGATAAAGCACACTTTAAATCAACA
+>EAS51_62:4:156:857:494
+CTCATACACACACATGGTTTAGGGGTATAATACCT
+>EAS51_62:4:156:857:494
+GTTTCCCATCATGAAGCACTGAACTTCCACGTCTC
+>EAS51_62:4:187:907:145
+TTTCTTCTCTCTCTTTTTTTTTTTTTTTATTGCAT
+>EAS51_62:4:282:962:46
+GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG
+>EAS51_62:4:282:962:46
+TACTTTACTGCTACTCAATATATCCATGTAACAAA
+>EAS51_62:4:308:614:911
+AAAAACAATTTGGTAATTTAGTTTTTTTTTTTTTC
+>EAS51_62:4:308:614:911
+TGCGCTTGTACTTCTAAATCTATAACAAAATTAAA
+>EAS51_62:5:119:38:945
+ATTCTAAAATCAGCAAGAGAAAAGCATACAGTCAT
+>EAS51_62:5:119:38:945
+TCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGA
+>EAS51_62:5:131:779:345
+GTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAA
+>EAS51_62:5:131:779:345
+TGGAAGACATAATCCCACGCTTCCTATGGAAAGGT
+>EAS51_62:5:154:669:853
+GTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCA
+>EAS51_62:5:154:669:853
+TGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGA
+>EAS51_62:5:192:716:235
+ATACACACACATGGTTTAGGGGTATAATACCTCTA
+>EAS51_62:5:192:716:235
+GACCCTACACGAATGCGTCTCTACCACAGGGGGCT
+>EAS51_62:5:236:498:526
+ACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGT
+>EAS51_62:5:236:498:526
+CAGCACATTACTACCCTGCAATTAATATAATTGTG
+>EAS51_62:5:290:319:736
+CGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCT
+>EAS51_62:5:290:319:736
+GTTCTCAAGGTTGTTGCAAGGGGGTCTATGTGAAC
+>EAS51_62:5:295:882:282
+AATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAG
+>EAS51_62:5:295:882:282
+CAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA
+>EAS51_62:5:86:697:941
+AAAAAAATCCCGGAAGATACATTGCAAGACAGACT
+>EAS51_62:5:86:697:941
+GTATGAGATTATGTAAAGTAACTGAACCTATGAGT
+>EAS51_62:6:12:484:836
+AAACACATAGCTAAAACTAAAAAAGCAAAAACAAA
+>EAS51_62:6:12:484:836
+AAATACTACTAGACCTAAGAGGGATGAGAAATTAC
+>EAS51_62:6:148:170:895
+AAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGT
+>EAS51_62:6:148:170:895
+AAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGA
+>EAS51_62:6:50:542:881
+CCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGA
+>EAS51_62:6:50:542:881
+TCCCAATTACGTCCTATCTTCTTCTTAGGTAAGAA
+>EAS51_62:7:144:28:475
+AGGAAATGCTTTACTGTCATAACTATGAAGAGACT
+>EAS51_62:7:144:28:475
+CAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGT
+>EAS51_62:7:157:784:936
+GCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCT
+>EAS51_62:7:157:784:936
+TGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTT
+>EAS51_62:7:162:195:761
+AACATGAACTAACTATATGCTGTTTACAAGAAACT
+>EAS51_62:7:162:195:761
+TCACAGGTTTTATAAAACAATTAATTGAGACTACA
+>EAS51_62:7:178:286:414
+CACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAG
+>EAS51_62:7:178:286:414
+TTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACG
+>EAS51_62:7:196:511:896
+ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT
+>EAS51_62:7:196:511:896
+GAAAAGCATACAGTCATCTATAAAGGAAATCCCAT
+>EAS51_62:7:248:17:435
+ATAACCATCCTACTAAATACATATGCACCTAACAC
+>EAS51_62:7:248:17:435
+CATGAGTTCAGGAAAAGGGGTGGAAAAAGATGTTC
+>EAS51_62:7:312:236:655
+GAGAAATTACCTAATTGGTACAATGTACAATATTC
+>EAS51_62:7:312:236:655
+TGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTA
+>EAS51_62:7:96:836:737
+ATCAACCTCATACACACACATGGTTTAGGGGTATA
+>EAS51_62:7:96:836:737
+TCCCATCATGAAGCACTGAACTTCCACGTCTCATC
+>EAS51_62:8:52:967:804
+AACTAAGCATCATAAATGAAGGGGAAATAAAGTCA
+>EAS51_62:8:52:967:804
+TTACCTAGTTGCTCTGTAGTCTCAATTAATTGTTT
+>EAS51_64:2:326:153:231
+ATTGTTTTCAACTTTGGAAAACAATTTGGTAATTT
+>EAS51_64:2:326:153:231
+TGCTACTCAATATATCCATGTAACAAATCTGCGCT
+>EAS51_64:3:143:310:958
+CTGCACATTACTACCCTGCAATTAATATAATTGTG
+>EAS51_64:3:143:310:958
+GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC
+>EAS51_64:3:190:727:308
+ACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTG
+>EAS51_64:3:190:727:308
+GGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGG
+>EAS51_64:3:255:45:399
+AAAAACTATGCTAAGTATTGGTAAAGATGTGGGGA
+>EAS51_64:3:255:45:399
+GGTACAATGTACAATATTCTGATGATGGTTACACT
+>EAS51_64:3:285:417:147
+AGTCAAAATTAAAGTTCAATACTCACCATCATAAA
+>EAS51_64:3:285:417:147
+TTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAA
+>EAS51_64:3:309:303:278
+CTATGAAGAGACTATTGCCAGATGAACCACACATT
+>EAS51_64:3:309:303:278
+TCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCC
+>EAS51_64:3:67:782:132
+ATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAG
+>EAS51_64:3:67:782:132
+TCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTT
+>EAS51_64:3:7:268:263
+TCGTACAGAAGTTTAATGGAGCCTTGGGACCTTAC
+>EAS51_64:3:7:268:263
+TTGCGTTATTTGAGTTGGTGGAAGACATAATCCCA
+>EAS51_64:3:80:885:513
+GAAATTCTTCATCCTGGACCCTGAGAGATTCTGCA
+>EAS51_64:3:80:885:513
+GCATTTTGTCAGTTACCAAATGTGTTTATTACCAG
+>EAS51_64:3:90:435:691
+GGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGT
+>EAS51_64:3:90:435:691
+TCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGT
+>EAS51_64:4:102:467:897
+AGCATGGTTGTACAGGGCAATACATGAGATTATTA
+>EAS51_64:4:102:467:897
+GCTTTCAACGCTTCTAGCCATTTCTTTTGTCTTTT
+>EAS51_64:4:116:738:142
+AAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS51_64:4:116:738:142
+TGTCAAACACGAATGTTATGCCCTGCTAAACTAAG
+>EAS51_64:4:163:31:455
+CTTACTTCCAGCTCCCCAGAGGGACAGCTNNCAAC
+>EAS51_64:4:163:31:455
+GGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCT
+>EAS51_64:4:179:389:585
+TGGCCACTTTTTATCGCATTTCCCTTTAGAACCTA
+>EAS51_64:4:179:389:585
+TGTGAAATGAATGAGATTATTAGGAAATGCTTTAC
+>EAS51_64:4:181:476:394
+AACAATATTAACTTTGAATAAAAAGGGATTAAATT
+>EAS51_64:4:181:476:394
+TCATCACTAAACCAGTCCTATAAGAAATGCTCAAA
+>EAS51_64:4:189:467:475
+CTACATGGCTGATTATGAAAACAATGTTCCCCAGA
+>EAS51_64:4:189:467:475
+TATCTGGATTCTGGGAAATTCTTCATCCTGGACCC
+>EAS51_64:4:189:571:366
+AACAAATACTACTAGACCTAAGAGGGATGAGAAAT
+>EAS51_64:4:189:571:366
+TCTACGCAAACAGAAACCAAATGAGAGAAGGAGTA
+>EAS51_64:4:318:345:156
+GGACCCTGAGAGATTCTGCAGCCCAGATCCAGATT
+>EAS51_64:4:318:345:156
+TTATGAAAACAATGTTCCCCAGATACCATCCCTGT
+>EAS51_64:4:57:786:414
+CTCATTCACTCCAGCTCCCTGTCACCCAATGGACC
+>EAS51_64:4:57:786:414
+TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA
+>EAS51_64:5:177:24:389
+AGCTTTCAAGAAGTATGAGATTATGTAAAGTAACT
+>EAS51_64:5:177:24:389
+CAAGACAGACTTCATCAAGATATGTAGTCATCAGA
+>EAS51_64:5:202:39:380
+CAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGT
+>EAS51_64:5:202:39:380
+CTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAG
+>EAS51_64:5:290:247:509
+AGCCCATACTTTACTGCTACTCAATATATCCATGT
+>EAS51_64:5:290:247:509
+TCTCAAATATTGCTAGTGGGAGTATAAATTGTTTT
+>EAS51_64:6:118:41:489
+ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA
+>EAS51_64:6:118:41:489
+CCCTGCTAAACTAAGCATCATAAATGAAGGGGAAA
+>EAS51_64:6:124:128:489
+CTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTT
+>EAS51_64:6:124:128:489
+GTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGT
+>EAS51_64:6:143:763:480
+AAACGCGTAACTGCGCTCTCATTCACTCCAGCTCC
+>EAS51_64:6:143:763:480
+CTGAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA
+>EAS51_64:6:195:348:703
+CAGTTACCAAATGTGTTTATTACCAGAGGGATGGA
+>EAS51_64:6:195:348:703
+TAATGAAAACTATATTTATGCTATTCAGTTCTAAA
+>EAS51_64:6:206:994:556
+ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTC
+>EAS51_64:6:206:994:556
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>EAS51_64:6:210:809:735
+AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT
+>EAS51_64:6:210:809:735
+GAGGTTCAGAACTTGAAGACAAGTCTCTTATGAAT
+>EAS51_64:6:213:54:878
+CTCTGTCTTGATTTACTTGTTGTTGGTTTTTTGTT
+>EAS51_64:6:300:622:86
+GGTTACACTAAAAGCCCATACTTTACTGCTACTCA
+>EAS51_64:6:300:622:86
+TCATACAATGATAAAAAGATCAATTCAGCAAGAAG
+>EAS51_64:6:54:695:952
+ACAGTAAAATAAAACAAAGGAGGTCATCATACAAT
+>EAS51_64:6:54:695:952
+GGTTACACTAAAAGCCCATACTTTACTGCTACTCA
+>EAS51_64:7:104:965:517
+AGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA
+>EAS51_64:7:104:965:517
+NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN
+>EAS51_64:7:140:752:822
+CATCACTAAACCAGTCCTATAAGAAATGCTCAAAA
+>EAS51_64:7:140:752:822
+GCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGAT
+>EAS51_64:7:152:918:824
+TACTATGTTTCTTATCTGCACATTACTACCCTGCA
+>EAS51_64:7:152:918:824
+TTTCCCATCATGAAGCACTGAACTTCCACGTCTCA
+>EAS51_64:7:92:493:891
+AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTG
+>EAS51_64:7:92:493:891
+AGTCAACATGAAGGAAAAAAATTCTAAAATCAGCA
+>EAS51_66:1:282:274:50
+ATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAA
+>EAS51_66:1:282:274:50
+CCCCTCTAAGCCGTTCTATTTGTAATGAAAACTAT
+>EAS51_66:1:289:207:323
+CCATACTTTACTGCTACTCAATATATCCATGTAAC
+>EAS51_66:1:289:207:323
+CTAGTGGGAGTATAAATTGATTTCCACTTTGGAAA
+>EAS51_66:1:64:182:741
+AAAAAAACAAATTAAACTCTAACAAAAGTAAATAA
+>EAS51_66:3:102:511:946
+ATGTAAAAGTGACTGTTATTGTCTTGACACCCAAC
+>EAS51_66:3:102:511:946
+CCCAGTCCCTGCCCCATCTCGGGTAATCTCTCTCC
+>EAS51_66:3:155:375:623
+AAGGAGTAGCTATACTTATATCAGATAAAGCACAC
+>EAS51_66:3:155:375:623
+CAATATTAACTTTGAATAAAAAGGGATTAAATTCC
+>EAS51_66:3:166:532:438
+AACAAATACTACTAGACCTAAGAGGGATGAGAAAT
+>EAS51_66:3:166:532:438
+AACTAAAAAAGCAAAAACAAAAACTATGCTAAGTA
+>EAS51_66:3:233:191:520
+TGAACTTCTGTAATTGAAAAATTCATTTAAGAAAT
+>EAS51_66:3:246:711:981
+AAAAAAACCTGTCAAACACGAATGTTATGCCCTGC
+>EAS51_66:3:246:711:981
+AGACTTCATCAAGATATGTAGTCATCAGACTATCT
+>EAS51_66:3:263:689:572
+AAAGAAAAAAAAACCTGTCAAACACGAATGTTATG
+>EAS51_66:3:263:689:572
+AAGATATGTAGTCATCAGACTATCTAAAGTCAACA
+>EAS51_66:3:29:381:169
+ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT
+>EAS51_66:3:29:381:169
+CATCAACCTCATACACACACATGGTTTAGGGGTAT
+>EAS51_66:3:39:59:738
+GAGATTATTAGGAAATGCTTTACTGTCATAATTAT
+>EAS51_66:3:39:59:738
+GTCCTATGTTCTTCTTAGGGAAGAACAGCTTAGGT
+>EAS51_66:4:188:460:1000
+GTGTCCATGTACACACGCTGTCCTATGTACTTATC
+>EAS51_66:4:188:460:1000
+TCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGC
+>EAS51_66:4:191:40:536
+ATAAAAAAAGACTACCCAGATTCATAAAACAAATA
+>EAS51_66:4:191:40:536
+CAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAA
+>EAS51_66:4:209:92:210
+GAGATTATTAGGAAATGCTTTACTGTCATAACTAT
+>EAS51_66:4:209:92:210
+TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT
+>EAS51_66:4:240:264:231
+CAACAGATCAAGAAGGAGGGGCAATGGACGAGTTA
+>EAS51_66:4:240:264:231
+TGTAATGAAAACTATATTTATGCTATTCAGTTCTA
+>EAS51_66:4:277:482:316
+CACTAATGCGCTCCACGCCCAAGCCCTTCTCACAG
+>EAS51_66:4:277:482:316
+TGTCCTATGTACTTATCATGACTCTATCCCAAATT
+>EAS51_66:4:310:287:420
+AAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGAT
+>EAS51_66:4:310:287:420
+TACAGAGCAACTAGGTAAAAAATTAACATTACAAC
+>EAS51_66:4:322:350:374
+ACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAA
+>EAS51_66:4:322:350:374
+CATTGCAAGACAGACTTCATCAAGATATGTAGTCA
+>EAS51_66:5:210:674:911
+TCATAAATACACACAAAAGTACAAAACTCACAGGT
+>EAS51_66:5:210:674:911
+TGGCAGAACAGATTTAAAAACATGAACTAACTATA
+>EAS51_66:5:269:280:716
+TGATATCTGGATTCTGGGAAATTCTTCATCCTGGA
+>EAS51_66:5:269:280:716
+TTTGTCAATGTCAGGGAAGGAGCATTTTTTCAGTT
+>EAS51_66:5:273:545:1001
+AACAAAGGAGGTCATCATACAATGATAAAAAGATC
+>EAS51_66:5:273:545:1001
+AGATTTAAAAACATGAACTAACTATATGCTGTTTA
+>EAS51_66:5:285:395:450
+GTCATCTATAAAGGAAATCCCATCAGAATAACAAT
+>EAS51_66:5:285:395:450
+TATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS51_66:5:308:400:602
+ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG
+>EAS51_66:5:308:400:602
+CCTGTCACCCAATGGACCTGTGATATCTGGATTCT
+>EAS51_66:6:284:442:747
+AAAGAGGCTCAAAGAATGCCAGGAAGATACATTGC
+>EAS51_66:6:310:747:415
+TGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGT
+>EAS51_66:6:310:747:415
+TGTAGTCATCAGACTATCTAAAGTCAACATGAAGG
+>EAS51_66:7:174:987:334
+ACACCCAAGCCCTTCTCACAGTTTCTGCCCCCAGC
+>EAS51_66:7:174:987:334
+GTCCATGTACACACGCTGTCCTATGTACTTATCAT
+>EAS51_66:7:4:234:610
+AAAAAACCTGTCAAACACGAATGTTATGCCCTCCT
+>EAS51_66:7:4:234:610
+AAAAATCAACATCACAAATACACACAAAAGTACAA
+>EAS51_66:7:84:411:336
+GCTTGTACTTCTAAATCTATAAAAAAATTAAAATT
+>EAS51_66:8:36:688:722
+ATGTCTATTTTTGTCTTGACACCCAACTAATATTT
+>EAS51_66:8:36:688:722
+GTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTT
+>EAS51_66:8:43:972:506
+AGAAACCTTACAAGCCAGAAGAGATTGGATCTAAT
+>EAS51_66:8:43:972:506
+TAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAAT
+>EAS51_66:8:66:655:769
+TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT
+>EAS51_66:8:66:655:769
+TTTGTCAGTTACCAAATGTGTTTATTACCAGAGGG
+>EAS51_66:8:9:80:353
+AATTAATATAATTGTGTCCATGTACACACGCTGTC
+>EAS51_66:8:9:80:353
+CCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCT
+>EAS51_78:7:113:43:634
+ATTTGTCTGAGAAAAACAGTCTAGATGAGAGAGAA
+>EAS51_78:7:113:43:634
+CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTTTT
+>EAS51_78:7:147:64:416
+AAACAATGTCCCCCAGATACCATCCCTGTCTTACT
+>EAS51_78:7:147:64:416
+CTCATCTAGGGGAACAGGGAGGTGCACTAATGCGC
+>EAS51_78:7:164:727:977
+GAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAA
+>EAS51_78:7:164:727:977
+TACAAGCCAGAAGAGATTGGATCTAATTTTTCGAC
+>EAS51_78:7:186:199:927
+CTACGCGAATGCGTCTCTACCACAGGGGGCTGCGC
+>EAS51_78:7:186:199:927
+TGGCATTTGCCTTCAGACCCTACACGAATGCGTCT
+>EAS51_78:7:215:516:299
+AAGCTATGCTAAGTATTGGTAAAGATGTGGGGAAA
+>EAS51_78:7:215:516:299
+AATTACCTAATTGGTACAATGTACAATATTCTGAT
+>EAS51_78:7:270:448:491
+GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTC
+>EAS51_78:7:270:448:491
+TTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAAC
+>EAS51_78:7:303:402:142
+AGCATTTTGTCAGTTACCAAATGTGTTTATTACCA
+>EAS51_78:7:303:402:142
+TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT
+>EAS51_78:7:316:961:576
+TGTGATATCTGGATTCTGGGAAATTCTTCATCCCG
+>EAS51_78:7:316:961:576
+TTACGGGTGTAATCTCTCTACATGGCTAATTATGA
+>EAS54_61:1:115:868:887
+CATACACACACATGGTTTAGGGGTATAATACCTCT
+>EAS54_61:1:115:868:887
+TCTCATTCACTCCAGCTCCCTGTCACCCAATGGAC
+>EAS54_61:2:168:61:867
+GATGTTCTACGCAAACAGAAACCAAATGAGAGAAG
+>EAS54_61:2:168:61:867
+TCATAAAACAAATACTACTAGACCTAAGAGGGATG
+>EAS54_61:2:66:757:918
+CCATCCTACTAAATACATATGCACCTAACACAAGA
+>EAS54_61:2:66:757:918
+GGGGTGGAAAAAGATGTTCTACGCAAACAGAAACC
+>EAS54_61:3:150:933:810
+AGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATT
+>EAS54_61:3:150:933:810
+CAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGG
+>EAS54_61:3:155:758:710
+ATCAGATAAAGCACACTTTAAATCAACAACAGTAA
+>EAS54_61:3:155:758:710
+TTTGAATAAAAAGGGATTAAATTCCCCCACTTAAG
+>EAS54_61:3:20:762:748
+CACAATGAACAACAGGAAGAAAAGGTCTTTCAAAA
+>EAS54_61:3:20:762:748
+TTCTTTTGGCATTTGCCTTCAGACCCTACACGAAT
+>EAS54_61:4:143:69:578
+ATTGGGAGCCCCTCTAAGCCGTTCTATTTGTAATG
+>EAS54_61:4:83:452:970
+AATGAAAACTATATTTATGCTATTCAGTTCTAAAT
+>EAS54_61:4:83:452:970
+AGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTG
+>EAS54_61:4:86:660:932
+AATACATATGCACCTAACACAAGACTACCCAGATT
+>EAS54_61:4:86:660:932
+ATATAAAAAAATTAAAATTTAACAAAAGTAAATAA
+>EAS54_61:6:126:541:194
+AGTACGACCAGCTCCCCAGAGGGAAAGCTTTCAAC
+>EAS54_61:6:126:541:194
+CAGCCCAGATCCAGATTGCTTGTGGTCTGACAGGC
+>EAS54_61:6:25:949:33
+AAAGTAACTGAACCTATGAGTCACAGGTATTCCTG
+>EAS54_61:6:25:949:33
+GATATGTAGTCATCAGACTATCTAAAGTCAACATG
+>EAS54_61:7:114:506:971
+ACTAAATACATATGCACCTAACACAAGACTACCCA
+>EAS54_61:7:114:506:971
+GGGTGGAAAAAGATGTTCTACGCAAACAGAAACCA
+>EAS54_61:7:64:37:257
+CCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTT
+>EAS54_61:7:64:37:257
+TTTGATTTGGTGGAAGACATAATCCCACGCTTCCT
+>EAS54_61:8:165:441:708
+CCATCATAAATACACACAAAAGTACAAAACTCACA
+>EAS54_61:8:165:441:708
+CTTAAAGAAAAAAAAACCTGTCAAACACGAATGTT
+>EAS54_61:8:4:173:814
+CTGCTACTCAATATATCCATGTAACAAATCTGCGC
+>EAS54_61:8:4:173:814
+GATAAAAAGATCAATTCAGCAAGAAGATATAACCA
+>EAS54_65:2:127:288:655
+ACTAGGTAAAAAATTAACATTACAACAGGAACAAA
+>EAS54_65:2:127:288:655
+TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA
+>EAS54_65:2:182:924:833
+TTTTTTTTTTTTTATTTGCGCTTTTTTTTTTTTTT
+>EAS54_65:2:264:157:150
+GGAAAAATGGACAAGATTCTGATGAGGGTTACACT
+>EAS54_65:2:264:157:150
+TAAAGCACACTTTAAATCAACAACAGTAAAATAAA
+>EAS54_65:2:94:356:809
+AAATCTATAACAAAATTAAAATTTAACAAAAGTAA
+>EAS54_65:2:94:356:809
+CTAAATACATATGCACCTAACACAAGACTACCCAG
+>EAS54_65:3:102:884:63
+GTCTTGACACCCAACTAATATTTGTCTGAGCAAAA
+>EAS54_65:3:102:884:63
+TGTCTTCCTCTGTCTTGATTTCCTTGTTGTTGGTT
+>EAS54_65:3:155:541:234
+CTAAATACATATGCACCTAACACAAGACTACCCAG
+>EAS54_65:3:155:541:234
+TGCGCTTGTACTTCTAAATCTATAAAAAAATTAAA
+>EAS54_65:3:214:946:229
+AAATGAACAGAGCTTTCAAGAAGTATGATATTATG
+>EAS54_65:3:214:946:229
+ACAAAGAATGCCAGGAAGATACATTGCAAGACAGA
+>EAS54_65:3:273:901:459
+CCAGCATGGTTGTACTGGGCAATACATGAGATTAT
+>EAS54_65:3:273:901:459
+TGTCCTATGTACTTATCATGACTCTATCCCAAATT
+>EAS54_65:3:290:558:349
+ACCATCCCTGTCTTACTTCCAGCTCCCCAGCGGGA
+>EAS54_65:3:290:558:349
+TCTCAGCTAGGGGAACAGGGAGGTGCACTAATGCG
+>EAS54_65:3:320:20:250
+AAATAAAACACATAGCTAAAACTAAAAAAGCAAAA
+>EAS54_65:3:320:20:250
+TTTTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAA
+>EAS54_65:3:321:311:983
+ATTTATGCTATTCAGTTCTAAATATAGAAATTGAA
+>EAS54_65:3:326:652:890
+TTCTGTAATTGAAAAATTCATTTAAGAAATTACAA
+>EAS54_65:4:137:319:642
+CTTGTTGTTGGTTTTCTGTTTCTTTTTTTGATTTT
+>EAS54_65:4:174:753:617
+ATATATCCATGTAACAAATCTGCGCTTGTACTTCT
+>EAS54_65:4:174:753:617
+GATATAACCATCCTACTAAATACATATGCACCTAA
+>EAS54_65:4:192:714:341
+AAATTAAAATTTAACAAAAGTAAATAAAACACATA
+>EAS54_65:4:192:714:341
+ACACAAGACTACCCAGATTCATAAAACAAATACTA
+>EAS54_65:4:193:38:987
+AGGGAAGAACAGCTTAGGTATCAATTTTGTGTTCT
+>EAS54_65:4:193:38:987
+TGAGATTATTAGGAAATGCTTTACTGTCATAACTA
+>EAS54_65:4:246:313:499
+ACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAG
+>EAS54_65:4:246:313:499
+CTTTAAACGCTTCTAGCCATTTCTTTTGGCATTTG
+>EAS54_65:4:325:795:213
+AGACTACAGAGCAACTAGGTAAAAAATTAACATTA
+>EAS54_65:4:325:795:213
+GGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG
+>EAS54_65:4:61:346:384
+CAACTAAGAAGAAACCTTACAAGCCAGAAGAGATT
+>EAS54_65:4:61:346:384
+CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGAT
+>EAS54_65:4:91:267:655
+CAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGT
+>EAS54_65:4:91:267:655
+TGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGT
+>EAS54_65:6:115:538:276
+CAAATGTGTTTATTACCAGAGGGATGGAGGGAAGA
+>EAS54_65:6:115:538:276
+TATTTGTAATGAAAACTATATTTATGCTATTCAGT
+>EAS54_65:6:164:797:930
+AGCTAGAGACCCATTTGGAGCCCCTCTAAGCCGTT
+>EAS54_65:6:164:797:930
+GTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGT
+>EAS54_65:6:18:376:416
+GCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGA
+>EAS54_65:6:18:376:416
+TTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGTTT
+>EAS54_65:6:277:590:364
+CTCTACATGGCTGATTATGAAAACAATGTTCCCCA
+>EAS54_65:6:277:590:364
+CTGGGAAATTCTTCATCCTGGACCCTGAGAGATTC
+>EAS54_65:6:326:71:741
+TCTCGTTTTTTTTTCTTTCTTTTCTCTTTTTTTTT
+>EAS54_65:6:49:183:435
+CCATCATGAAGCACTGAACTTCCACGTCTCATCTA
+>EAS54_65:6:49:183:435
+GACTATTGCCAGATGAACCACACATTAATACTATG
+>EAS54_65:6:67:56:806
+TATAAAGGAAATCCCATCAGAATAACAATGGGCTT
+>EAS54_65:6:67:56:806
+TCCTGACAAGCAAATGCTAAGATAATTCATCATCA
+>EAS54_65:7:117:452:744
+AATATTAACTTTGAATAAAAAGGGATTAAATTCCC
+>EAS54_65:7:117:452:744
+ACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAA
+>EAS54_65:7:155:629:357
+AGAAGAGATTGGATCTAATTTTTGGACTTCTTAAA
+>EAS54_65:7:155:629:357
+AGAGGCTCAAAGAATGCCAGGAAGATACATTGCAA
+>EAS54_65:7:159:253:353
+ATAAAACAATTAATTGAGACTACAGAGCAACTAGG
+>EAS54_65:7:159:253:353
+GAAGGGGAAATAAAGTCAAGTCTTTCCTGACAGGC
+>EAS54_65:7:56:57:985
+TTCTGTCTTCTCTCCTGTCTTCTTTTCTCTTCTTT
+>EAS54_65:7:56:57:985
+TTTTTTCTCTTTTCTCTTTTTTTTTTTTTTTTTTT
+>EAS54_65:7:68:825:405
+AAAACCTCATATATCAATATTAACTTTGAATAAAA
+>EAS54_65:7:68:825:405
+AAACCAAATGAGAGAAGGAGTAGCTATACTTATAT
+>EAS54_65:8:10:975:766
+AATAACACAAGACTACCCAGATTCATAAAACAAAT
+>EAS54_65:8:10:975:766
+TTAATAAAGACATGAGTTCAGGTAAAGGGGTGAAA
+>EAS54_65:8:140:924:923
+GAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGAT
+>EAS54_65:8:140:924:923
+TTTTAGCCATTTCTTTTGGCATTTGCCTTCAGACC
+>EAS54_65:8:147:687:428
+ATATGCACCTAACACAAGACTACCCAGATTCATAA
+>EAS54_65:8:147:687:428
+ATGTTCTACGCAAACAGAAACCAAATGAGAGAAGG
+>EAS54_65:8:178:187:610
+AAATACACACAAAAGTACAAAACTCACAGGTTTTA
+>EAS54_65:8:178:187:610
+TTGGCAGAACAGATTTAAAAACATGAACTAACTAT
+>EAS54_65:8:240:719:799
+AGATTGGCAGAACAGATTTAAAAACATGAACTAAC
+>EAS54_65:8:240:719:799
+TTAAAGTTCAATACTCACCATCATAAATACACACA
+>EAS54_65:8:305:819:245
+AAATTCATTTAAGAAATTACAAAATATAGTTGAAA
+>EAS54_65:8:76:493:708
+TTTATGCTATTCAGTTCTAAATATAGAAATTGAAA
+>EAS54_67:1:138:186:274
+GGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGG
+>EAS54_67:1:138:186:274
+TAATTGTGTCCATGTACACACGCTGTCCTATGTAC
+>EAS54_67:1:159:222:274
+GAACCACACATTAATACTATGTTTCTTATCTGCAC
+>EAS54_67:1:159:222:274
+GTCTGGGGAAAGTCTCAGGGAGCCGTCCGTGTCCT
+>EAS54_67:1:15:381:715
+GACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTT
+>EAS54_67:1:15:381:715
+GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT
+>EAS54_67:1:88:54:900
+ATCAACAACAGAAAAATAAAACAAAGGAGGTCATC
+>EAS54_67:1:88:54:900
+TGATGATGGTTACACTAAAAGCCCATACTTCACTG
+>EAS54_67:2:22:471:500
+GTAAATAAAACACATAGCTAAAACTAAAAAAGCAA
+>EAS54_67:2:22:471:500
+TACTACTAGACCTAAGAGGGATGAGAAATTACCTA
+>EAS54_67:3:114:736:433
+AACAAAACCTCATATATCAATATTAACTTTGAATA
+>EAS54_67:3:114:736:433
+ATGTTCTACGCAAACAGAAACCAAGTGAGAGAAGG
+>EAS54_67:3:172:196:746
+AAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCT
+>EAS54_67:3:172:196:746
+GCATACAGTCATCTATAAAGGAAATCCCATCAGAA
+>EAS54_67:3:175:730:949
+TTATGCTATTCAGTTCTAAATATAGAAATTGAAAC
+>EAS54_67:3:197:261:624
+GACTATCTAAAGTCAACATGAAGGAAAAAAATTCT
+>EAS54_67:3:197:261:624
+GCCCTGCTAAACTAAGCATCATAAATGAAGGGGAA
+>EAS54_67:3:47:471:858
+ACACCCAACTAATATTTGTCTGAGCAAAACAGTCT
+>EAS54_67:3:47:471:858
+CATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTG
+>EAS54_67:4:142:943:582
+TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA
+>EAS54_67:4:145:607:216
+AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC
+>EAS54_67:4:145:607:216
+TGAAAAACAGGAAGAAAAGGTCTTTCAAAAGGTGA
+>EAS54_67:4:7:526:343
+TCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCT
+>EAS54_67:4:7:526:343
+TGAAAACAGTGTTCCCCAGATACCATCCCTGTCTT
+>EAS54_67:5:117:33:262
+AATTAACATTACAACAGGAACAAAACCTCATATAT
+>EAS54_67:5:117:33:262
+ACAAGCAAATGCTAAGATAATTCATCATCACTAAA
+>EAS54_67:5:124:241:608
+CTGAACTTCCACGTCTCATCTAGGGGAACAGGGAG
+>EAS54_67:5:124:241:608
+GGGTATAATACCTCTACATGGCTGATTATGAAAAC
+>EAS54_67:5:127:828:697
+ATGCCAGGAAGATACATTGCAAGACAGACTTCATC
+>EAS54_67:5:127:828:697
+TAAAGAAAAAAAAACCTGTCAAACACGAATGTTAT
+>EAS54_67:5:149:639:910
+CAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGA
+>EAS54_67:5:149:639:910
+TCACTAAACCAGTCCTATAAGAAATGCTCAAAAGA
+>EAS54_67:5:71:408:741
+AGTCATCTATAAAGGAAATCCCATCAGAATAACAA
+>EAS54_67:5:71:408:741
+TCCTGACAAGCAAATGCTAAGATAATTCATCATCA
+>EAS54_67:6:107:395:312
+CAAAATATAGTTGAAAGCTCTAACAATAGACTAAA
+>EAS54_67:6:107:395:312
+CAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGT
+>EAS54_67:6:109:953:668
+CAATATATCCATGTAACAAATCTGCGCTTGTACTT
+>EAS54_67:6:109:953:668
+CCACTTTGGAAAACAATTTGGTAATTTCGTTTTTT
+>EAS54_67:6:198:503:669
+CAATGATAAAAAGATCAATTCAGCAAGAAGATATA
+>EAS54_67:6:198:503:669
+CAGATTTAAAAACATGAACTAACTATATGCTGTTT
+>EAS54_67:6:43:859:229
+TTCAAATGAACTTCTGTAATTGAAAAATTCATTTA
+>EAS54_67:6:46:285:790
+AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC
+>EAS54_67:6:46:285:790
+TCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAAG
+>EAS54_67:7:101:752:996
+AACCTTACAAGCCAGAAGAGATTGGATCTAATTTT
+>EAS54_67:7:101:752:996
+AAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT
+>EAS54_67:7:197:399:319
+CAAAAAACAAATACTACTAGACCTAAGAGGGATGA
+>EAS54_67:7:197:399:319
+TAGAAACCAAATGAGAGAAGGAGTAGCTATACTTA
+>EAS54_67:8:19:855:491
+TGGCATTTGCCTTCAGACCCTACACGAATGCGTCT
+>EAS54_67:8:19:855:491
+TGTGTGTTCTCATCAACCTCATACACACACATGGT
+>EAS54_67:8:46:900:610
+GATATCTGGATTCTGGGAAATTCTTCATCCTGGAC
+>EAS54_67:8:46:900:610
+TACATGGCTGATTATGAAAACAATGTTCCCCAGAT
+>EAS54_71:2:125:628:79
+GCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGA
+>EAS54_71:2:125:628:79
+TTTATGCTATTCAGTTCTAAATATAGAAATTGAAA
+>EAS54_71:2:204:264:413
+CAATGAACAACAGAAAGAAAAGTTCTTTCAAAAGG
+>EAS54_71:2:204:264:413
+TGCCCTCTTCTTCCAAAGATGAAACGCGTAACTG
+>EAS54_71:2:85:686:696
+AATCAGCAAGAGAAAAGCATACAGTCATCTATAAA
+>EAS54_71:2:85:686:696
+TAAACTAAGCATCATAAATGAAGTGGAAATAAAG
+>EAS54_71:3:186:989:869
+ACACACATGGTTTAGGGGTATAATACCTCTACATG
+>EAS54_71:3:186:989:869
+GGGAAATTCTTCATCCTGGACCCTGAGAGATTCT
+>EAS54_71:3:254:32:275
+GATGGAGGGAAGAGGGACGCTGAAGAACTTTGAT
+>EAS54_71:3:254:32:275
+TGCAACTGTGAGCCATCACAATGAACAACAGGAAG
+>EAS54_71:3:257:288:731
+AAGAAGATATAACCATCCTACTAAATACATATGCA
+>EAS54_71:3:257:288:731
+TGCTGTTTACAAGAAACTCATTAATAAAGACATG
+>EAS54_71:3:267:821:860
+GCATACAGTCATCTATAAAGGAAATCCCATCAGA
+>EAS54_71:3:267:821:860
+TCTTTAGTCTTGCTAGAGATTTAGACATCTAAATG
+>EAS54_71:3:78:855:352
+AAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTT
+>EAS54_71:3:78:855:352
+AACAATAGACTAAACCAAGCAGAAGAAAGAGGTTC
+>EAS54_71:4:127:725:381
+AATTACAAAATATAGTTGAAAGCTCTAACAATAGA
+>EAS54_71:4:127:725:381
+TGAACCTATGAGTCACAGGTATTCCTGAGGAAAA
+>EAS54_71:4:13:981:659
+CGGGACAATGGACGAGGTAAACCGCACATTGACAA
+>EAS54_71:4:13:981:659
+TGTAGCCCCTCTAAGGCGTTCTATTTGTAATGAA
+>EAS54_71:4:14:88:306
+AAAGAATGCCAGGAAGATACATTGCAAGACAGAC
+>EAS54_71:4:14:88:306
+AGAAGAGATTAGATCTAATTTTTGGACTTCTTAAA
+>EAS54_71:4:165:397:25
+GCAACTGTGAGCCATCACAATGAACAACAGGAAGA
+>EAS54_71:4:165:397:25
+TTCAACGCTTCTAGCCATTTCTTTTGGCATTTGC
+>EAS54_71:4:169:256:888
+AGGTTCAGAACTTGAAGACAAGTCTCTTATGAATT
+>EAS54_71:4:169:256:888
+ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGT
+>EAS54_71:4:169:862:829
+AAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCA
+>EAS54_71:4:169:862:829
+GCCATTTCTTTTGGCATTTGCCTTCAGACCCTAC
+>EAS54_71:4:206:741:810
+ACTAACTATATGCTGTTTACAAGAAACTCATTAA
+>EAS54_71:4:206:741:810
+CAAAAGTACAAAACTCACAGGTTTTATAAAACAAT
+>EAS54_71:4:209:159:130
+CTTATCATGACTCTATCCCAAATTCCCAATTACGT
+>EAS54_71:4:209:159:130
+GCCCCCAGCATGGTTGTACTGGGCAATACATGAG
+>EAS54_71:4:233:97:262
+ACCACACATTAATACTATGTTTCTTATCTGCCCA
+>EAS54_71:4:233:97:262
+GTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGA
+>EAS54_71:4:252:428:683
+TGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCT
+>EAS54_71:4:284:269:882
+TTTCTTTTCTCTTTTTTTTTTTTTTGTTTTTGCA
+>EAS54_71:4:328:669:662
+GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTT
+>EAS54_71:4:328:669:662
+TCTTCATCCTGTACCCTGAGAGATTCTGCAGCCCA
+>EAS54_71:4:72:63:435
+CCTTGCAACAACCTTGAGAACCCCAGGGAATTTG
+>EAS54_71:4:72:63:435
+TGATATCTGGATTCTGGGAAATTCTTCATCCTGGA
+>EAS54_71:4:73:182:444
+AACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCG
+>EAS54_71:4:73:182:444
+CTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTT
+>EAS54_71:5:153:543:671
+GCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTG
+>EAS54_71:5:153:543:671
+TAAAATGTCTATTTTTGTCTTGACACCCAACTAAT
+>EAS54_71:5:16:434:204
+AGATGAGAGAGAACTTCCCTGGAGGTCTGATGGC
+>EAS54_71:5:16:434:204
+CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT
+>EAS54_71:5:81:685:141
+ACTGAACCTATGAGTCACAGGTATTCCTGAGGAA
+>EAS54_71:5:81:685:141
+AGATATGTAGTCATCAGACTATCTAAAGTCAACAT
+>EAS54_71:6:172:896:83
+AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGT
+>EAS54_71:6:172:896:83
+CATTTGCCTTCAGACCCTACACGAATGCGTCTCTA
+>EAS54_71:6:215:133:909
+TGTGTGTTCTCATCAACCTCATACACACACATGG
+>EAS54_71:6:215:133:909
+TTGCCTTCAGACCCTACACGAATGCGTCTCTACCA
+>EAS54_71:6:224:932:942
+CTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGG
+>EAS54_71:6:224:932:942
+GTCCATGTACACACGCTGTCCTATGTACTTATCA
+>EAS54_71:6:228:354:203
+AATGGACCTGTGATATCTGGATTCTGGGAAATTC
+>EAS54_71:6:228:354:203
+TCAACCTCATACACACACATGGTTTAGGGGTATAA
+>EAS54_71:6:264:705:89
+AAACATATGCACCTAACACAAGACTACCCAGATTC
+>EAS54_71:6:264:705:89
+AAGGGGTGGAAAAAGATGTTCTACGCAAACAGAA
+>EAS54_71:6:324:515:230
+AAAACAGTCTAGATGAGAGAGAACTTCCCTGGAG
+>EAS54_71:6:324:515:230
+CCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGG
+>EAS54_71:6:82:932:400
+GACACCCAACTAATATTTGTCTGAGCAAAACAGTC
+>EAS54_71:6:82:932:400
+GTAATCTCTCTCCTCTTCGCTGCATCCCTGTCTT
+>EAS54_71:7:130:260:553
+AGCAAGAGAAAAGCATACAGTCATCTATAAAGGAA
+>EAS54_71:7:130:260:553
+GTGAGAAGTTTGGAAAAACTATTTGAGGAAGCAC
+>EAS54_71:7:194:867:616
+ATCCATGTAACAAATCTGCGCTTGTACTTCTATT
+>EAS54_71:7:194:867:616
+TTTTCCACTTTGGAAAACAATTTGGTAATTTCGTT
+>EAS54_71:7:212:329:348
+AACCACACATTAATACTATGTTTCTTATCTGCAC
+>EAS54_71:7:212:329:348
+CCCATCATGAAGCACTGAACTTCCACGTCTCATCT
+>EAS54_71:7:250:698:842
+AAAAAGTACAAAACTCACAGGTTTTATAAAACAA
+>EAS54_71:7:250:698:842
+AAGAAAAAAAAACCTGTCAAACACGAATGTTATGC
+>EAS54_71:7:80:760:490
+CATGGCTGATTATGAAAACAATGTTCCCCAGATAC
+>EAS54_71:7:80:760:490
+CTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAG
+>EAS54_71:7:97:743:602
+AAGCAAATGCTAAGATAATTCATCATCACTAAACC
+>EAS54_71:7:97:743:602
+ATTACAACAGGAACAAAACCTCATATATCAATAT
+>EAS54_71:8:105:854:975
+ATTTAACAAAAGTAAATAAAACACATAGCTAAAAC
+>EAS54_71:8:105:854:975
+TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTG
+>EAS54_71:8:113:856:319
+AAATCAACAACAGTAAAATAAAACAAAGGAGGT
+>EAS54_71:8:113:856:319
+CCCACTTAAGAGATATAGATTGGCAGAACAGATTT
+>EAS54_71:8:215:830:609
+AAGACATCTAAATGAAAGAGGCTCAAAGAATGC
+>EAS54_71:8:234:21:950
+TTTTTTTTTTTTCTCCTCTCTTTTTTTTTTTTT
+>EAS54_71:8:321:642:388
+TACCAAATGTGTTTATTACCAGAGGGATGGAGG
+>EAS54_71:8:321:642:388
+TCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGT
+>EAS54_71:8:38:856:336
+AATGGACCTGTGATATCTGGATTCTGGGAAATT
+>EAS54_71:8:38:856:336
+CACACATGGTTTAGGGGTATAATACCTCTACATGG
+>EAS54_73:3:203:419:243
+GGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTAT
+>EAS54_73:3:203:419:243
+TTGAAAGCTCTAACAATAGACTAAACCAAGCAGAA
+>EAS54_73:3:239:796:221
+ATAACTATGAAGAGACTATTGCCAGCTGACCCCCC
+>EAS54_73:3:239:796:221
+GGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGT
+>EAS54_73:3:23:502:103
+AGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTT
+>EAS54_73:3:23:502:103
+GACACCCAACTAATATTTGTCTGAGCAAAACAGTC
+>EAS54_73:3:29:833:612
+AAGCTCTAACAATAGACTAAACCAAGCAGAAGAAA
+>EAS54_73:3:29:833:612
+CAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGT
+>EAS54_73:3:313:827:992
+AAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTG
+>EAS54_73:3:313:827:992
+TGTTTCTTTGTTTGATTTGGTGGAAGACATAATCC
+>EAS54_73:3:37:761:635
+CCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCT
+>EAS54_73:3:37:761:635
+TGTGAGCCATCACAATGAACAACAGGAAGAAAAGG
+>EAS54_73:3:4:854:140
+CCTGACAAGCAAATGCTAAGATAATTCATCATCAC
+>EAS54_73:3:4:854:140
+GTCATCTATAAAGGAAATCCCATCAGAATAACAAT
+>EAS54_73:3:88:24:744
+GTCCTGTGATATCTGGATTCTGGGAAATTCTTCAT
+>EAS54_73:3:88:24:744
+TGTCAATGTCAGGGAAGGAGCATTTTTGAAGTTTA
+>EAS54_73:5:145:635:390
+TAAACCAGTCCTATAAGAAATGCTCAAAAGAATTG
+>EAS54_73:5:145:635:390
+TTGAATAAAAAGGGATTAAATTCCCCCACTTAAGA
+>EAS54_73:5:169:714:644
+CCTAATTGGTACAATGTACAATATTCTGATGATGG
+>EAS54_73:5:169:714:644
+GAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAG
+>EAS54_73:5:220:733:736
+CCATCCTACTAAATACATATGCACCTAACACAAGA
+>EAS54_73:5:220:733:736
+TTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAA
+>EAS54_73:5:231:339:551
+CTGAGAGATTCTGCAGCCCAGATCCAGATTGCTTG
+>EAS54_73:5:231:339:551
+TGTCAGTTACCAAATGTGTTTATTACCAGAGGGAT
+>EAS54_73:5:255:796:239
+AAGGGATTAAATTCCCCCACTTAAGAGATAGAGAT
+>EAS54_73:5:255:796:239
+ATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGT
+>EAS54_73:5:263:557:988
+AATGATAAAAAGATCAATTCAGCAAGAAGATATAA
+>EAS54_73:5:263:557:988
+CTGCTACTCAATATATCCATGTAACAAATCTGCGC
+>EAS54_73:5:271:874:367
+AAAAAACCTGTCAAACACGAATGTTATGCCCTGCT
+>EAS54_73:5:271:874:367
+ATATGTAGTCATCAGACTATCTAAAGTCAACATTA
+>EAS54_73:5:3:233:911
+GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC
+>EAS54_73:5:3:233:911
+TGGCTGATTATGAAAACAATGTTCCCCAGATACCA
+>EAS54_73:5:44:498:945
+ATAGGGATGGAGGGAAGAGGGCCGCTGAAGAACTT
+>EAS54_73:5:44:498:945
+CCTATAAGCCGTTCTATTTGTAATGAAAACTATAT
+>EAS54_73:5:53:61:31
+AAGAAACTCATTAATAAAGACATGAGTTCAGATAA
+>EAS54_73:5:53:61:31
+CAATTCAGCAAGAAGATATAACCATCCTACTAAAT
+>EAS54_73:7:134:243:630
+ACATTACTACCCTGCAATTAATATAATTGTGTCCA
+>EAS54_73:7:134:243:630
+TCATCTAGGGGAACAGGGAGGCGCACTAATGAGCT
+>EAS54_73:7:200:65:291
+CAATACTCACCATCATAAATACACACAAAAGTACA
+>EAS54_73:7:200:65:291
+CTAACTATATGCTGTTTACAAGAAACTCATTAATA
+>EAS54_73:7:223:440:667
+AATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATT
+>EAS54_73:7:223:440:667
+TTCAGAACTTGAAGACAAGTCTCTTATGAATTAAC
+>EAS54_73:7:254:572:431
+AAGAGATATAGATTGGCAGAACAGATTTAAAAACA
+>EAS54_73:7:254:572:431
+ATCAGATAAAGCACACTTTAAATCAACAACAGTAA
+>EAS54_73:7:63:854:610
+AATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGT
+>EAS54_73:7:63:854:610
+GACTATTGCCAGATGAACCACACATTAATACTATG
+>EAS54_73:7:97:892:419
+AATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTA
+>EAS54_73:7:97:892:419
+GCAACTAGGTAAAAAATTAACATTACAACAGGAAC
+>EAS54_81:2:128:394:455
+GTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTC
+>EAS54_81:2:128:394:455
+TTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTT
+>EAS54_81:2:27:856:401
+ACCTCTACATGGCTGATTATGAAAACAATGTTCCC
+>EAS54_81:2:27:856:401
+TCATCTAGGGGAACAGGGAGGTGCACTAATGCGCT
+>EAS54_81:2:280:512:316
+GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG
+>EAS54_81:2:280:512:316
+TTACTGTCATAACTATGAAGAGACTATTGCCAGCT
+>EAS54_81:2:285:367:932
+ATAGACCCCCTTGCAACAACCTTGAGAACCCCAGG
+>EAS54_81:2:285:367:932
+GCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGT
+>EAS54_81:2:317:72:221
+AAAAAAATTCTAAAATCAGCAAGAGAAAAGCATAC
+>EAS54_81:2:317:72:221
+ATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTC
+>EAS54_81:2:31:98:804
+CACGAATGCGTCTCTACCACAGGGGGCTGCGCGGC
+>EAS54_81:2:31:98:804
+CTTTACTGTCATAACTATGAAGAGACTATTGCCAG
+>EAS54_81:2:49:330:699
+AAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS54_81:2:49:330:699
+TTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAA
+>EAS54_81:2:5:491:391
+CCCTGCTCACAGTTTCTGCCCCCAGCATGGTTGTA
+>EAS54_81:2:5:491:391
+TTGGCATTTGCCTTCAGACCCTACACGAATGCGTC
+>EAS54_81:6:11:801:386
+AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA
+>EAS54_81:6:11:801:386
+CACTATAAATCAACAACAGTAAAATAAAACAAAGG
+>EAS54_81:6:122:589:134
+AAAACCTGTCAAACACGAATGTTATGCCCTGCTAA
+>EAS54_81:6:122:589:134
+ACAGACTTCATCAAGATATGTAGTCATCAGACTAT
+>EAS54_81:6:199:511:426
+AATTCTTCATCCTGGACCCTGAGAGATTCTGCAGC
+>EAS54_81:6:199:511:426
+GGGGTATAATACCTCTACATGGCTGATTATGAAAA
+>EAS54_81:6:204:779:181
+AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG
+>EAS54_81:6:204:779:181
+CTTTTGGCATTTGCCTTCAGACCCTACACGAATGC
+>EAS54_81:6:265:251:147
+AAAAGTACAAAACTCACAGGTTTTATAAAACAATT
+>EAS54_81:6:265:251:147
+TGTTATGCCCTGCTAAACTAAGCATCATAAATGAA
+>EAS54_81:6:273:424:207
+AAGAGATATAGATTGGCAGAACAGATTTAAAAACA
+>EAS54_81:6:273:424:207
+TAAATCAACAACAGTAAAATAAAACAAAGGAGGTC
+>EAS54_81:6:35:186:412
+ATAACCATCCTACTAAATACATATGCACCTAACAC
+>EAS54_81:6:35:186:412
+CATGTAACAAATCTGCGCTTGTACTTCTAAATCTA
+>EAS54_81:6:75:917:886
+ACTGGGCAATACATGAGATTATTAGGAAATGCTTT
+>EAS54_81:6:75:917:886
+TTATCATGACTCTATCCCAAATTCCCAATTACGTC
+>EAS54_81:7:124:253:889
+CTAAGCATCATAAATGAAGGGGAAATAAAGTCAAG
+>EAS54_81:7:124:253:889
+TCAGCAAGAGAAAAGCATACAGTCATCTATAAAGG
+>EAS54_81:7:166:979:531
+ATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGAT
+>EAS54_81:7:166:979:531
+TGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCC
+>EAS54_81:7:226:869:36
+ATATATAAAGGAAATCCCATCAGAATAACAATGGG
+>EAS54_81:7:226:869:36
+TGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTG
+>EAS54_81:7:246:205:734
+CTCCAGGGAAGTTATCTCTCATCTAGANNNNNTTG
+>EAS54_81:7:246:205:734
+CTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGT
+>EAS54_81:7:293:355:321
+GCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTC
+>EAS54_81:7:293:355:321
+TTACCAAATGTGTTTATTACCAGAGGGATGGAGGG
+>EAS54_81:7:324:472:791
+AAAGCCAATACTTTACTGCTACTCAATATATCCAT
+>EAS54_81:7:324:472:791
+TGATAAAAAGATCAATTCAGCAAGAAGATATAACC
+>EAS54_81:7:325:150:465
+AACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATG
+>EAS54_81:7:325:150:465
+TGATGCCCTCTTCTTCCAAAGATGAAACGCGTAAC
+>EAS54_81:7:74:596:137
+CTCTATCCCAAATTCCCAATTACGTCCTATCTTCT
+>EAS54_81:7:74:596:137
+GGTCCCTGCCCCATCGCTTGTAATCTCTCGCCTTT
+>EAS54_81:8:130:912:658
+TACACACACATGGTTTAGGGGTATAATACCTCTAC
+>EAS54_81:8:130:912:658
+TCCCATCATGAAGCACTGAACTTCCACGTCTCATC
+>EAS54_81:8:142:858:903
+ATGGTTGTACTGGGCAATACATGAGATTATTAGGA
+>EAS54_81:8:142:858:903
+CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC
+>EAS54_81:8:14:360:580
+ACCCTACACGAATGCGTCTCTACCACAGGGGGCGG
+>EAS54_81:8:14:360:580
+ATGAGATTATTAGGAAATGCTTTACTGTCATAACT
+>EAS54_81:8:159:71:155
+AAAGGTTGTTGGGAGATTTTTAATGATTCCTCGAT
+>EAS54_81:8:159:71:155
+GTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAA
+>EAS54_81:8:177:800:714
+CTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGAT
+>EAS54_81:8:177:800:714
+TTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTG
+>EAS54_81:8:271:180:509
+AATACTCACCATCATAAATACACACAAAAGTACAA
+>EAS54_81:8:271:180:509
+ATATAGATTGGCAGAACAGATTTAAAAACATGAAC
+>EAS54_81:8:40:925:442
+GAGGTTCAGAACTTGAAGACAAGTCTCTTATGAAT
+>EAS54_81:8:40:925:442
+TTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCT
+>EAS54_81:8:41:530:663
+AGAACAGATTTAAAAACATGAACTAACTATATGCT
+>EAS54_81:8:41:530:663
+ATACTCACCATCATAAATACACACAAAATTACAAA
+>EAS54_81:8:63:930:152
+ACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATTC
+>EAS54_81:8:63:930:152
+ATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGAT
+>EAS54_81:8:78:735:536
+TTTTTTTTTTTTTCATTTCTCTTTTTTTTTTTTTT
+>EAS56_53:1:124:243:35
+GCATATCCAGATTGCTGGTGGTCTGACAGGCAGCA
+>EAS56_53:1:124:243:35
+TGTGTTTATTACCAGAGGGATGGAGGGAAGAGCGA
+>EAS56_53:1:154:118:488
+AAAAGCATACAGTCATCTATAAAGGAAATCCCATC
+>EAS56_53:1:154:118:488
+AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA
+>EAS56_53:1:23:403:981
+TACTGTCATAACTATGAAGAGACTATTGCCAGATG
+>EAS56_53:1:23:403:981
+TCTTCATAGGGAAGAACAGCTTAGGTATCAATTTG
+>EAS56_53:1:47:303:887
+ACATTACTACCCTGCCATTAATATACTTGTGTCCA
+>EAS56_53:1:47:303:887
+CACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGA
+>EAS56_53:1:92:875:345
+AAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAA
+>EAS56_53:1:92:875:345
+CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA
+>EAS56_53:2:170:265:818
+GAGGGGAAGCTTTCAACGCTTCTAGCACTTTCTTT
+>EAS56_53:2:170:265:818
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS56_53:2:59:286:290
+AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC
+>EAS56_53:2:59:286:290
+TCAAGTCTTTCCTGACAAGCAAATGCTAAGATAAT
+>EAS56_53:3:101:809:776
+GTACTTCTAAATCTATAAAAAAATTAAAATTTAAC
+>EAS56_53:3:101:809:776
+TATGCACCTAACACAAGACTACCCAGATTCATAAA
+>EAS56_53:3:107:738:484
+GGTCATCATACAATGATAAAAAGATCAATTCAGCA
+>EAS56_53:3:107:738:484
+TGAACTAACTATATGCTGTTTACAAGAAACTCATT
+>EAS56_53:3:126:558:408
+TTCTATTTGTAATGAAAACTATATTTATGCTATTC
+>EAS56_53:3:126:558:408
+TTTATTACCAGAGGGATGGAGGGAAGAGGGACGCT
+>EAS56_53:3:134:126:465
+AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAA
+>EAS56_53:3:134:126:465
+AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA
+>EAS56_53:4:130:568:978
+TAAATATAGAAATTGAAACAGCTGTGTTTAGTGAC
+>EAS56_53:4:130:568:978
+TGAAACGCGAAACTGCACTCTCATTCACTCCAGCT
+>EAS56_53:4:153:977:200
+TCATCAACCTCATACACACACATGGTTTAGGGGTA
+>EAS56_53:4:153:977:200
+TGTCACCCAATGGACCTGTGATATCTGGATTCTGG
+>EAS56_53:4:154:762:630
+AGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTT
+>EAS56_53:4:154:762:630
+CCTTCAGACCCTACACGAATGCGTCTCTACCACAG
+>EAS56_53:4:168:528:288
+CAGGCTGCAACTGTGAGCCATCACAATGAACAACA
+>EAS56_53:4:168:528:288
+GCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCC
+>EAS56_53:4:45:707:147
+AAAAATTCTAAAATCAGCAAGAGAAAAGCATACAG
+>EAS56_53:4:45:707:147
+ATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAG
+>EAS56_53:6:180:695:621
+ATACAGTCATCTATAAAGGAAATCCCATCAGAATA
+>EAS56_53:6:180:695:621
+TACTGAAAAGCAAATGCTAAGATAATTCATCATCA
+>EAS56_53:7:22:22:934
+ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA
+>EAS56_53:7:22:22:934
+CTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG
+>EAS56_53:8:122:430:882
+CCTACTAAATACATATGCACCTAACACAAGACTAC
+>EAS56_53:8:122:430:882
+CTATAAAAAAATTAAAATTTAACAAAAGTAAATAA
+>EAS56_53:8:179:549:753
+TACTACCCTGCAATTAATATAATTGTGTCCATGTA
+>EAS56_53:8:179:549:753
+TGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTC
+>EAS56_53:8:28:701:724
+TGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGAT
+>EAS56_53:8:28:701:724
+TTTTGTCAGTTACCAAATGTGTTTATTACCAGAGG
+>EAS56_57:1:122:38:103
+ATAAAACAATTAATTGAGACTACAGAGCAACTAGG
+>EAS56_57:1:122:38:103
+GAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTAC
+>EAS56_57:1:125:884:276
+TTGCAAGACAGACTTCATCAAGATATGTAGTCATC
+>EAS56_57:1:125:884:276
+TTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAAC
+>EAS56_57:1:189:130:136
+ATTTAGACATCTAAATGAAAGAGGCTCAAAGAATG
+>EAS56_57:1:189:130:136
+GGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGA
+>EAS56_57:1:189:503:110
+ATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGA
+>EAS56_57:1:189:503:110
+CTAACAATAGACTAAACCAAGCAGAAGAAAGAGTT
+>EAS56_57:1:228:182:717
+GGTCTGACAGGCTGCAACTGTGAGCCATCCCCATG
+>EAS56_57:1:228:182:717
+TCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAA
+>EAS56_57:1:278:440:902
+AAGCACTGAACTTCCACGTCTCATCTAGGGGAACA
+>EAS56_57:1:278:440:902
+ATACTATGTTTCTTATCTGCACATTACTACCCTGC
+>EAS56_57:1:288:384:444
+TAACTTTGAATAAAAAGGGATTAAATTCCCCCACT
+>EAS56_57:1:288:384:444
+TCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAA
+>EAS56_57:2:158:909:321
+ATACAGTCATCTATAAAGGAAATCCCATCAGAATA
+>EAS56_57:2:158:909:321
+TTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCT
+>EAS56_57:2:178:192:499
+GTGAGCCATCACAATGAACAACAGGAAGAAAAGGT
+>EAS56_57:2:178:192:499
+TCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCC
+>EAS56_57:2:206:873:186
+ACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCA
+>EAS56_57:2:206:873:186
+GTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTG
+>EAS56_57:2:236:841:20
+AAAGGAAATCCCATCAGAATAACAATGGGCTTCTC
+>EAS56_57:2:236:841:20
+GCTAAGATAATTCATCATCACTAAACCAGTCCTAT
+>EAS56_57:2:237:855:581
+CTAAACGCCCATACTTTACTGCTACTCAATATATC
+>EAS56_57:2:237:855:581
+TACAATGATAAAAAGATCAATTCAGCAAGAAGATA
+>EAS56_57:2:23:268:529
+TGAAAGAGGCTCAAAGAATGCCAGGAAGATACATT
+>EAS56_57:2:259:42:969
+GCTGTAATGATGCCCCTTGGCCATCACCCGGTCCC
+>EAS56_57:2:259:42:969
+GGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCA
+>EAS56_57:2:262:297:601
+TGGACCTGTGATATCTGGATTCTGGGAAATTCTTC
+>EAS56_57:2:262:297:601
+TGTTCTCATCAACCTCATACACACACATGGTTTAG
+>EAS56_57:2:284:597:682
+AAAAAAAAACCTGTCAAACACGAATGTTATGCCCT
+>EAS56_57:2:284:597:682
+TTCATCAAGATATGTAGTCATCAGACTATCTAAAG
+>EAS56_57:2:44:153:969
+AAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAA
+>EAS56_57:2:44:153:969
+AAAAGCATACAGTCATCTATAAAGGAAATCCCATC
+>EAS56_57:3:112:729:591
+ATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTT
+>EAS56_57:3:112:729:591
+GAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCT
+>EAS56_57:3:119:761:239
+CGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATC
+>EAS56_57:3:119:761:239
+TGAAGAGACTATTGCCAGATGAACCACACATTAAT
+>EAS56_57:3:285:489:327
+AATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGA
+>EAS56_57:3:285:489:327
+CTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAA
+>EAS56_57:3:319:174:811
+CACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATG
+>EAS56_57:3:319:174:811
+TTATCTGCACATTTCTACCCTGCAATTAATATAAT
+>EAS56_57:3:41:739:907
+CAGAAGAGATTGGATCTAATTTTTGGACTTCTTAA
+>EAS56_57:3:41:739:907
+GAATGCCAGGAAGATACATTGCAAGACAGACTTCA
+>EAS56_57:3:81:786:340
+TACTATGTTTCTTATCTGCACATTACTACCCTGCA
+>EAS56_57:3:81:786:340
+TCCACGTCTCATCTAGGGGAACAGAGAGGTGCACT
+>EAS56_57:4:233:478:792
+GCCTTCAGACCCTACACGAATGCGTCTCTACCACA
+>EAS56_57:4:233:478:792
+GTGTTCTCATCAACCTCATACACACACATGGTTTA
+>EAS56_57:4:262:965:756
+AGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCC
+>EAS56_57:4:262:965:756
+TTAATATAATTGTGTCCATGTACACACGCTGTCCT
+>EAS56_57:4:71:707:568
+CTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTG
+>EAS56_57:4:71:707:568
+GTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGAT
+>EAS56_57:4:98:862:154
+AAAGATCAATTCAGCAAGAAGATATAACCATCCTA
+>EAS56_57:4:98:862:154
+TGCTACTCAATATATCCATGTAACAAATCTGCGCT
+>EAS56_57:5:105:521:563
+TATGTACTTATCATGACTCTATCCCAAATTCCCAA
+>EAS56_57:5:105:521:563
+TGTAATGCTGCCCCTTGGCCATCCCCCGGTCCCTG
+>EAS56_57:5:136:389:320
+TCCTATGTACTTATCATGACTCTATCCCAAATTCC
+>EAS56_57:5:136:389:320
+TTCTGCCCCCAGCATGGTTGTACTGGGCAATACAT
+>EAS56_57:5:145:383:182
+AAACCTCTTTAGTCTTGCTAGAGATTTAGACATCT
+>EAS56_57:5:145:383:182
+TTGAAGACAAGTCTCTTATGAATTAACCCAGTCAG
+>EAS56_57:5:207:926:427
+GGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTG
+>EAS56_57:5:207:926:427
+TAGGAAATGCTTTACTGTCATAACTATGAAGAGAC
+>EAS56_57:5:214:644:390
+AAAAACATGAACTAACTATATGCTGTTTACAAGAA
+>EAS56_57:5:214:644:390
+AAATAAAACAAAGGAGGTCATGATACAATGATAAA
+>EAS56_57:5:24:284:360
+AGTCATCAGACTATCTAAAGTCAACATGAAGGAAA
+>EAS56_57:5:24:284:360
+CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA
+>EAS56_57:5:266:133:789
+AAAAAATTAACATTACAACAGGAACAAAACCTCAT
+>EAS56_57:5:266:133:789
+GTGGAAAAAGATGTTCTACGCAAACAGAAACCAAA
+>EAS56_57:5:303:542:924
+AATAAAACAAAGGAGGTCATCATACAATGATAAAA
+>EAS56_57:5:303:542:924
+CAATGTACAATATTCTGATGATGGTTACACTAAAA
+>EAS56_57:5:309:109:987
+AACAAATACTACTAGACCTAAGAGGGATGAGAAAT
+>EAS56_57:5:309:109:987
+GAGAGAAGGAGTAGCTATACTTATATCAGATAAAG
+>EAS56_57:5:30:788:376
+ACGTCTCATCTAGGGGAACAGGGAGGTGCACTAAT
+>EAS56_57:5:30:788:376
+TGTTTCTTATCTGCACATTACTACCCTGCAATTAA
+>EAS56_57:5:324:728:956
+ATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGT
+>EAS56_57:5:324:728:956
+TGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCT
+>EAS56_57:5:53:544:889
+AGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA
+>EAS56_57:5:53:544:889
+GCTTCTCAGCGGAAACCTTACAAGCCAGAAGAGAT
+>EAS56_57:5:71:994:576
+AAGATAATTCATCATCACTAAACCAGTCCTATAAG
+>EAS56_57:5:71:994:576
+TAGGTAAAAAATTAACATTACAACAGGAACAAAAC
+>EAS56_57:6:145:144:796
+ATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTC
+>EAS56_57:6:145:144:796
+GGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGAC
+>EAS56_57:6:157:643:175
+GGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGA
+>EAS56_57:6:157:643:175
+TTCTATTTGTAATGAAAACTATATTTATGCTATTC
+>EAS56_57:6:175:289:351
+CATCCTACTAAATACATATGCACCTAACACAAGAC
+>EAS56_57:6:175:289:351
+TGCGCTTGTACTTCTAAATCTATAAAAAAATTAAA
+>EAS56_57:6:190:289:82
+AGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCAC
+>EAS56_57:6:190:289:82
+CTCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAA
+>EAS56_57:6:21:553:57
+AAATACTACTAGACCTAAGAGGGATGAGAAATTAC
+>EAS56_57:6:21:553:57
+AACAAAAGTAAATAAAACACATAGCTAAAACTAAA
+>EAS56_57:6:234:787:12
+AAGCTTGGGCTGTAATGATGCCCCTTGGCCATCAC
+>EAS56_57:6:234:787:12
+ACACGCTGGCCTATGTACTTATAATGACTCTATCC
+>EAS56_57:6:325:759:288
+GCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCT
+>EAS56_57:6:325:759:288
+GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC
+>EAS56_57:6:44:280:641
+AACCCCCTTGCAACAACCTTGAGAACCCCAGGGAA
+>EAS56_57:6:44:280:641
+TCATTCACTCCAGCTCCCTGTCACCCAATGGACCT
+>EAS56_57:6:4:223:776
+AGTAACTGAACCTATGAGTCACAGGTATTCCTGAG
+>EAS56_57:6:4:223:776
+TGTAGTCATCAGACTATCTAAAGTCAACATGAAGG
+>EAS56_57:7:159:125:297
+GCAAGCTAGAGTCCCATTTGGAGCCACTCTAAGAC
+>EAS56_57:7:159:125:297
+GGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTA
+>EAS56_57:7:247:522:670
+CTATCCCAAATTCCCAATTACGTCCTATCTTCTTC
+>EAS56_57:7:247:522:670
+TACATGAGATTATTAGGAAATGCTTTACTGTCATA
+>EAS56_57:7:273:562:954
+AAAGTTCAATACTCACCATCATAAATACACACAAA
+>EAS56_57:7:273:562:954
+TTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAA
+>EAS56_57:7:287:258:321
+TAATACTATGTTTCTTATCTGCACATTACTACCCT
+>EAS56_57:7:287:258:321
+TGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCAT
+>EAS56_57:7:33:954:724
+CCTAAGAGGGATGAGAAATTACCTAATTGGTACAA
+>EAS56_57:7:33:954:724
+TCAGATAAAGCACACTTTAAATCAACAACAGTAAA
+>EAS56_57:7:57:826:977
+ATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAA
+>EAS56_57:7:57:826:977
+TGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTT
+>EAS56_57:7:76:786:458
+GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAC
+>EAS56_57:7:76:786:458
+TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT
+>EAS56_57:8:72:44:435
+AAAAGCAAAAACAAAAACTATGCTAAGTATTGGTA
+>EAS56_57:8:72:44:435
+ATTGGTACAATGTACAATATTCTGATGATGGTTAA
+>EAS56_59:1:126:526:276
+GAACTTCTGTAATTGAAAAATTCATTTAAGAAATT
+>EAS56_59:1:128:584:952
+ATCATACAATGATAAAAAGATCAATTCAGCAAGAA
+>EAS56_59:1:128:584:952
+GCCCATACTTTACTGCTACTCAATATATCCATGTA
+>EAS56_59:1:219:294:861
+CTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCC
+>EAS56_59:1:219:294:861
+TATAATTGTGTCCATGTACACACGCTGTCCTCTGT
+>EAS56_59:1:248:122:558
+AATGTACAATATTCTGATGATGGTTACACTAAAAG
+>EAS56_59:1:248:122:558
+GGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGA
+>EAS56_59:1:278:906:933
+AATATAATTGTGTCCATGTACACACGCTGTCCTAT
+>EAS56_59:1:278:906:933
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS56_59:1:82:670:302
+AGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACG
+>EAS56_59:1:82:670:302
+TCCTACTAAATACATATGCACCTAACACAAGACTA
+>EAS56_59:1:93:490:901
+AGAAAAGCATACAGTCATCTATAAAGGAAATCCCA
+>EAS56_59:1:93:490:901
+GTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGA
+>EAS56_59:2:104:402:732
+AAATCAGCAAGAGAAAAGCATACAGTCATCTATAA
+>EAS56_59:2:104:402:732
+AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA
+>EAS56_59:2:162:272:415
+ACATGAACTAACTATATGCTGTTTACAAGAAACTC
+>EAS56_59:2:162:272:415
+ATAAAAAGATCAATTCAGCAAGAAGATATAACCAT
+>EAS56_59:2:177:266:842
+ACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGT
+>EAS56_59:2:177:266:842
+GGCATTTGCCTTCAGACCCTACACGAATGCGTCTC
+>EAS56_59:2:177:552:234
+ACGCTGTCCTATGTACTTATCATGACTCTATCCCA
+>EAS56_59:2:177:552:234
+GCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCC
+>EAS56_59:2:201:768:529
+AGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTT
+>EAS56_59:2:201:768:529
+CAGACTATCTAAAGTCAACATGAAGGAAAAAAATT
+>EAS56_59:2:239:1001:406
+AGCATACAGTCATCTATAAAGGAAATCCCATCAGA
+>EAS56_59:2:239:1001:406
+CTTTCCTGACAAGCAAATGCTAAGATAATTCATCA
+>EAS56_59:2:60:677:921
+CATCAGACTATCTAAAGTCAACATGAAGGAAAAAA
+>EAS56_59:2:60:677:921
+GTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAAT
+>EAS56_59:3:149:953:349
+AGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTG
+>EAS56_59:3:149:953:349
+TTCTTTTGGCATTTGCCTTCAGACCCTACACGAAT
+>EAS56_59:3:166:626:836
+AGTACAAAACTCACAGGTTTTATAAAACAATTAAT
+>EAS56_59:3:166:626:836
+CTGTCAAACACGAATGTTATGCCCTGCTAAACTAA
+>EAS56_59:3:182:1002:639
+AAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAG
+>EAS56_59:3:182:1002:639
+AACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGA
+>EAS56_59:3:316:25:230
+GATGCCCCTTGGCCATCACCCGGTCCCTGCCCCAT
+>EAS56_59:3:316:25:230
+TGTCCTATGTACTTATCATGACTCTATCCCAAATT
+>EAS56_59:4:119:651:88
+GATTGGATCTAATTTTTGGACTTCTTAAAGAAAAA
+>EAS56_59:4:119:651:88
+GCTCAAAGAATGCCAGGAAGATACATTGCAAGACA
+>EAS56_59:4:262:928:237
+TGAGTTCAGGTAAAGGTGTGGAAAAAGATGTTCTA
+>EAS56_59:4:262:928:237
+TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA
+>EAS56_59:4:267:394:437
+AAACATCATAAATACACACAAAAGTACAAAACTCA
+>EAS56_59:4:267:394:437
+GGACTTCTTAAAGAAAAAAAAACCTGTCAAACACG
+>EAS56_59:4:278:524:521
+CACATTAATACTATGTTTCTTATCTGCACATTACT
+>EAS56_59:4:278:524:521
+CCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAA
+>EAS56_59:4:329:577:757
+AACGCGTAACTGCGCTCTCATTCACTCCAGCTCCC
+>EAS56_59:4:329:577:757
+TCAAGGTTGTTGCAAGGGGGTCTATGTGAACAAAG
+>EAS56_59:5:113:694:725
+CTGTAATGATGCCCCTTGGCCATCACCCGGTCCCT
+>EAS56_59:5:113:694:725
+GTACACACGCTGTCCTATGTACTTATCATGACTCT
+>EAS56_59:5:125:137:58
+AACTATATGCTGTTTACAAGAAACTCATTAATAAA
+>EAS56_59:5:125:137:58
+GGTTTTATAAAACAATTAATTGAGACTACAGAGCA
+>EAS56_59:5:181:713:140
+AGGGGAAATAAAGTCAAGTATTTCCTGACAAGCAA
+>EAS56_59:5:181:713:140
+CTACAGAGCAACAAGGTAAAAAATTAACATTACAA
+>EAS56_59:5:198:929:684
+AAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGA
+>EAS56_59:5:198:929:684
+GAAATCCCATCAGAATAACAATGGGCTTCTCAGCA
+>EAS56_59:5:232:336:46
+ATTTTTTTTTTTTCTTTTCTCTTGTTTCTTTTTTT
+>EAS56_59:5:325:544:349
+AAAATTAAAGTTCAATACTCACCATCATAAATACA
+>EAS56_59:5:325:544:349
+CAAGCCAGAAGAGATTGGATCTAATTTTTGGACTT
+>EAS56_59:5:90:629:652
+AGCCCATACTTTACTGCTACTCAATATATCCATGT
+>EAS56_59:5:90:629:652
+ATATTGCTAGTGGGAGTATAAATTGTTTTCCACTT
+>EAS56_59:6:187:925:547
+GGCTGATTATGAAAACAATGTTCCCAAGATACCAT
+>EAS56_59:6:187:925:547
+TGAACTTCCACGTCTCATCTAGGGGAACAGGGAGG
+>EAS56_59:6:199:327:965
+ATCTGGATTCTGGGAAATTCTTCATCCTGGACCCT
+>EAS56_59:6:199:327:965
+NCAACAACCTTGAGAACCCCAGGGAATTTGTCAAT
+>EAS56_59:6:227:657:95
+GTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGA
+>EAS56_59:6:227:657:95
+GTCATCTATAAAGGAAATCCCATCAGAATAACAAT
+>EAS56_59:6:286:753:854
+TCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTC
+>EAS56_59:6:286:753:854
+TTATCATGACTCTATCCCAAATTCCCAATTACGTC
+>EAS56_59:6:312:837:406
+AGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCT
+>EAS56_59:6:312:837:406
+CTGCACATTACTACCCTGCAATTAATATAATTGTG
+>EAS56_59:6:3:186:68
+AAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATT
+>EAS56_59:6:3:186:68
+TTACAAGCCAGAAGAGATTGGATCTAATTTTTGTA
+>EAS56_59:6:89:457:591
+ATGCCCTGCTAAACTAAGCATCATAAATGAAGGGG
+>EAS56_59:6:89:457:591
+CAGGTTTTATAAAACAATTAATTGAGACTACATAG
+>EAS56_59:7:260:985:520
+TCCCTGTCACCCAATGGACCTGTGATATCTGGATT
+>EAS56_59:7:260:985:520
+TGCAACAACCTTGAGAACCCCAGGGAATTTGTCAA
+>EAS56_59:7:318:679:883
+GATTTAGACATCTAAATGAAAGAGGCTCAAAGAAT
+>EAS56_59:7:319:246:304
+CTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGA
+>EAS56_59:7:319:246:304
+TGTCACCCAATGGACCTGTGATATCTGGATTCTGG
+>EAS56_59:7:82:902:868
+CTGTCACCCAATGGACCTGTGATATCTGGATTCTG
+>EAS56_59:7:82:902:868
+TTGCAACAACCTTGAGAACCCCAGGGAATTTGTCA
+>EAS56_59:8:49:182:192
+ACACAAAAGTACAAAACTCACAGGTTTTATAAAAC
+>EAS56_59:8:49:182:192
+GTTATGCCCTGCTAAACTGAGCATCATAAATGAAG
+>EAS56_59:8:80:542:549
+AGTAAACTCTCAAATATTGCTAGTGGGAGTATAAA
+>EAS56_59:8:80:542:549
+CTAAAAGCCCATACTTTACTGCTACTCAATATATC
+>EAS56_61:1:119:880:781
+ACAAATCTGCGCTTGTACTTCTAAATCTATAACAA
+>EAS56_61:1:119:880:781
+ACATATGCACCTAACACAAGACTACCCAGATTCAT
+>EAS56_61:1:210:880:606
+GGAGCATTTTGTCAGTTACCAAATGTGTTTATTAT
+>EAS56_61:1:210:880:606
+TCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCA
+>EAS56_61:1:303:184:14
+CAACCTTGAGAACCCCAGGGAATTTGTCAATGTCA
+>EAS56_61:1:303:184:14
+CAATGGACCTGTGATATCTGGATTCTGGGAAATTC
+>EAS56_61:2:152:860:286
+AGACTATTGCCAGATGAACCACACATTAATACTAT
+>EAS56_61:2:152:860:286
+TTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCA
+>EAS56_61:3:140:522:212
+CGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGA
+>EAS56_61:3:140:522:212
+GACAGGCTGCAACTGTGAGCCATCACAATGAACAA
+>EAS56_61:3:165:665:220
+ACAATTAATTGAGACTACAGAGCAACTAGGTAAAA
+>EAS56_61:3:165:665:220
+GGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATG
+>EAS56_61:3:208:118:673
+AAGAGGCTCAAAGAATGCCAGGAAGATACATTGCA
+>EAS56_61:3:208:118:673
+GAACAGAGCTTTCAAGAAGTATGAGATTATGTAAA
+>EAS56_61:3:260:827:289
+AAACCTCATATATCAATATTAACTTTGAATAAAAA
+>EAS56_61:3:260:827:289
+TGTTCTACGCAAACAGAAACCAAATGAGAGAAGGA
+>EAS56_61:3:45:758:616
+ATAAATTGTTTTCCACTTTGGAAAACAATTTGGTA
+>EAS56_61:3:45:758:616
+CATACTTTACTGCTACTCAATATATCCATGTAACA
+>EAS56_61:3:5:45:441
+TTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTT
+>EAS56_61:4:262:456:74
+TACCAAATGTGTTTATTACCAGAGGGATGGAGGGA
+>EAS56_61:4:262:456:74
+TGGGAAATTCTTCATCCTGGACCCTGAGAGATTCT
+>EAS56_61:5:194:470:416
+AAACTATTTGAGGAAGTAATTGGGGAAAACCTCTT
+>EAS56_61:5:194:470:416
+TCAGAACTTGAAGACAAGTCTCTTATGAATTAACC
+>EAS56_61:5:209:824:866
+ATCATCACTAAACCAGTCCTATAAGAAATGCTCAA
+>EAS56_61:5:209:824:866
+CAGCAACAAAACCTCATATATCAATATTAACTTTG
+>EAS56_61:5:263:314:696
+AACTCATTAATAAAGACATGAGTTCAGGTAAAGGG
+>EAS56_61:5:263:314:696
+AAGATCAATTCAGCAAGAAGATATAACCATCCTAC
+>EAS56_61:5:272:240:950
+CAGCAGAGCTTGGATCTAATTTTTGGACTTCTTCA
+>EAS56_61:5:272:240:950
+TCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAA
+>EAS56_61:6:10:106:737
+ACAAGAAACTCATTAATAAAGACATGAGTTCAGGT
+>EAS56_61:6:10:106:737
+ACAATGATAAAAAGATCAATTCAGCAAGAAGATAT
+>EAS56_61:6:160:272:398
+AATGTTCCCCAGATACCATCCCTGTCTTACTTCCA
+>EAS56_61:6:160:272:398
+GTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCA
+>EAS56_61:6:226:370:91
+AGAATAACAATGGGCTTCTCAGCGGAAACCTTACA
+>EAS56_61:6:226:370:91
+AGGCTCAAAGAATGCCAGGAAGATACATTGCAAGA
+>EAS56_61:6:227:259:597
+AATATAGAAATTGAAACAGCTGTGTTTAGTGCCTT
+>EAS56_61:6:256:67:461
+TCATGTTTGTGTCTTTCTATGCATTTTTTTTTTTT
+>EAS56_61:6:256:67:461
+TTGTTTTTTCTTCTTTTCTCTTTTTTTTTTTTTTT
+>EAS56_61:6:283:963:234
+AAAAAGATGTTCTACGCAAACAGAAACCAAATGAG
+>EAS56_61:6:283:963:234
+ACATATGCACCTAACACAAGACTACCCAGATTCAT
+>EAS56_61:6:307:208:477
+AAAGTCAAAATTAAAGTTCAATACTCACCATCATA
+>EAS56_61:6:307:208:477
+ACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAA
+>EAS56_61:7:280:133:495
+AGATGAACCACACATTAATACTATGTTTCTTATCT
+>EAS56_61:7:280:133:495
+CCATCATGAAGCACTGAACTTCCACGTCTCATCTA
+>EAS56_61:7:41:745:603
+CATTGCAAGACAGACTTCATCAAGATATGTAGTCA
+>EAS56_61:7:41:745:603
+TAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGT
+>EAS56_61:7:7:682:201
+CATACAGTCATCTATAAAGGAAATCCCATCAGAAT
+>EAS56_61:7:7:682:201
+GGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACA
+>EAS56_61:8:60:358:494
+GGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACA
+>EAS56_61:8:60:358:494
+TACCCAGATTCATAAAACAAATACTACTAGACCTA
+>EAS56_61:8:7:171:402
+GTGATATCTGGATTCTGGGAAATTCTTCATCCTGG
+>EAS56_61:8:7:171:402
+TCTACATGGCTGATTATGAAAACAATGTTCCCCAG
+>EAS56_63:1:119:446:185
+TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT
+>EAS56_63:1:119:446:185
+TTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAG
+>EAS56_63:1:145:71:26
+CTAGGGGAACAGGGAGGTGCACTAATGCGCTCCAC
+>EAS56_63:1:145:71:26
+TTTCTTATCTGCACATTACTACCCTGCAATTAATA
+>EAS56_63:2:119:161:322
+ATCAGAATAACAATGGGCTTCTCAGCAGAAACCTT
+>EAS56_63:2:119:161:322
+CTTTAGTCTTGCTAGAGATTTAGACATCTAAATGA
+>EAS56_63:2:33:357:858
+AAAATTAAAGTTCAATACTCACCATCATAAATACA
+>EAS56_63:2:33:357:858
+AGGGATTAAATTCCCCCACTTAAGAGATATAGATT
+>EAS56_63:2:74:656:272
+AACAAAGGAGGTCATCATACAATGATAAAAAGATC
+>EAS56_63:2:74:656:272
+TGTACAATATTCTGATGATGGTTACACTAAAAGCC
+>EAS56_63:3:40:594:752
+ATACACACACATGGTTTAGGGGTATAATACCTCTA
+>EAS56_63:3:40:594:752
+CTGCGCGGTTTCCCATCATGAAGCACTGAACTTCC
+>EAS56_63:3:41:468:459
+AAATTTAACAAAAGTAAATAAAACACATAGCTAAA
+>EAS56_63:3:41:468:459
+TTTTTTTTTTTTTTTTTTTCTTTTTTTTTTTTTTT
+>EAS56_63:3:93:1002:845
+AATTCCCAATTACGTCCTATCTTCTTCTTAGGGAA
+>EAS56_63:3:93:1002:845
+GGGCAATACATGAGATTATTAGGAAATGCTTTACT
+>EAS56_63:4:141:9:811
+TTTCTTTTCTCCTTTTTTTTTTTTTTTTTCTACAT
+>EAS56_63:4:184:659:377
+AAAAAGATGTTCTACGCAAACAGAAACCAAATGAG
+>EAS56_63:4:184:659:377
+CAAAACTACCCAGATTCATAAAACAAATACTACTA
+>EAS56_63:4:38:28:122
+AAATATAGTTGAAAGCTCTAACAATAGACTAAACC
+>EAS56_63:4:38:28:122
+GTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTG
+>EAS56_63:5:117:570:971
+ACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAG
+>EAS56_63:5:117:570:971
+ACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGA
+>EAS56_63:5:123:998:248
+ATCACAATGAACAACAGGAAGAAAAGGTCTTTCAA
+>EAS56_63:5:123:998:248
+TTTCTTTTGGCATTTGCCTTCAGACCCTACACGAA
+>EAS56_63:5:36:678:316
+ATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAA
+>EAS56_63:5:36:678:316
+TTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTA
+>EAS56_63:5:96:788:614
+TAGGGGTATAATACCTCTACATGGCTGATTATGAA
+>EAS56_63:5:96:788:614
+TTCCACGTCTCATCTAGGGGAACAGGGAGGTGCAC
+>EAS56_63:6:102:816:260
+AAATTACCTAATTGGTACAATGTACAATATTCTGA
+>EAS56_63:6:102:816:260
+TCAGATAAAGCACACTTTAAATCAACAACAGTAAA
+>EAS56_63:6:42:920:522
+AATTAATATAATTGTGTCCATGTACACACGCTGTT
+>EAS56_63:6:42:920:522
+CTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCC
+>EAS56_63:6:91:360:585
+AAGATGAAACGCGTAACTGCGCTCTCATTCACTCC
+>EAS56_63:6:91:360:585
+GACATCACAATGAACAACAGGAAGAAAAGGTCTTT
+>EAS56_63:7:109:22:383
+ATGTACAATATTCTGATGATGGTTACACTAAAAGC
+>EAS56_63:7:109:22:383
+CAACAACAGTAAAATAAAACAAAGGAGGTCATCAT
+>EAS56_63:7:137:139:248
+GTCAAACACGAATGTTATGCCCTGCTAAACTAAGC
+>EAS56_63:7:137:139:248
+TATCTAAAGTCAACATGAAGGAAAAAAATTCTAAA
+>EAS56_63:7:166:42:147
+AATGCGCTCCACGCCCAAGCCCTTCTAACAGTTTC
+>EAS56_63:7:166:42:147
+CTGCACATTACTACCCTGCAATTAATATAATTGTG
+>EAS56_63:7:185:213:330
+CAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAA
+>EAS56_63:7:185:213:330
+TCTGGGAAATTCTTCATCCTGGACCCTGAGAGATT
+>EAS56_63:7:190:95:706
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS56_63:7:190:95:706
+TTGTGTCCATGTACACACGCTGTCCTATGTACTTA
+>EAS56_63:7:34:334:825
+CATTTAAGAAATTACAAAATATAGTTGAAAGCTCT
+>EAS56_63:8:138:186:459
+CATTGCAAGACAGACTTCATCAAGATATGTAGTCA
+>EAS56_63:8:138:186:459
+GCCAGAAGAGATTGGAGCTAATTTTTGGACTTCTT
+>EAS56_63:8:150:508:757
+ATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGT
+>EAS56_63:8:150:508:757
+CTTGACACCCAACTAATATTTGTCTGAGCAAAACA
+>EAS56_63:8:4:571:820
+AAGAGATATAGATTGGCAGAACAGATTTAAAAACA
+>EAS56_63:8:4:571:820
+CAACAACAGTAAAATAAAACAAAGGAGGTCATCAT
+>EAS56_63:8:62:125:888
+CGGAAACCTTACAAGCCAGAAGAGATTGGATCTAA
+>EAS56_63:8:62:125:888
+TGCCAGGAAGATACATTGCAAGACAGACTTCATCA
+>EAS56_65:1:163:846:223
+CCAGCTCCCTGTCACCCAATGGACCTGTGATATCT
+>EAS56_65:1:163:846:223
+GCCTTTGTTCACATAGACCCCCTTGCAACAACCTT
+>EAS56_65:1:178:305:843
+ATGTTTCTTATCTGCACATTACTACCCTGCAATTA
+>EAS56_65:1:178:305:843
+CCACGTCTCATCTAGGGGAACAGGGAGGTGCACTA
+>EAS56_65:1:23:536:229
+AAAGCATACAGTCATCTATAAAGGAAATCCCATCA
+>EAS56_65:1:23:536:229
+AAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCA
+>EAS56_65:1:53:272:944
+CAACCCCCTTGCAACAACCTTGCGAACCCCAGGGA
+>EAS56_65:1:53:272:944
+TGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAA
+>EAS56_65:2:224:579:433
+ATAACAATGGGCTTCTCAGCAGAAACCTTACAAGC
+>EAS56_65:2:224:579:433
+TTCATCATCACTAAACCAGTCCTATAAGAAATGCT
+>EAS56_65:2:56:155:49
+ATCCTACTAAATACATATGCACCTAACACAAGACT
+>EAS56_65:2:56:155:49
+ATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCT
+>EAS56_65:3:168:741:680
+AGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCC
+>EAS56_65:3:168:741:680
+TGGTCTGACAGGCTGCAACTGTGAGCCATCACAAT
+>EAS56_65:3:47:64:359
+TTTTTTTTTTTCTCTCCTCTTTTTTTTTTTTTTTT
+>EAS56_65:4:124:367:72
+AGACTACCCAGATTCATAAAACAAATACTACTAGA
+>EAS56_65:4:124:367:72
+CATAGCTAAAACTAAAAAAGCAAAAACAAAAACTA
+>EAS56_65:4:126:966:514
+AGAAGAAGTAGCTATACTTATATCAGATAAAGCAC
+>EAS56_65:4:126:966:514
+TAAAAAGGGATTAAATTCCCCCACTTAAGAGATAT
+>EAS56_65:4:150:94:843
+AAAGGGATTAAATTCCCCCACTTAAGAGATATAGA
+>EAS56_65:4:150:94:843
+CAGATACATCCCACTTTAAATCAACCACAGTAAAA
+>EAS56_65:4:296:78:421
+TCTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTT
+>EAS56_65:4:296:78:421
+TGTTGGTGTTCGTTTTTTCTCCTGTTTCTTTTTCT
+>EAS56_65:5:121:380:656
+AATGTGTTTATTACCAGAGGGATGGAGGGAAGAGG
+>EAS56_65:5:121:380:656
+GCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTG
+>EAS56_65:5:131:742:561
+TCAAAAGGTGATGTGTGTTCTCATCAACCTCATAC
+>EAS56_65:5:131:742:561
+TGCCTTCAGACCCTACACGAATGCGTCTCTACCAC
+>EAS56_65:5:211:84:84
+CTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAG
+>EAS56_65:5:211:84:84
+GCAAGAGAAAAGCATACAGTCATCTATAAAGGAAA
+>EAS56_65:5:262:53:888
+AAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGAT
+>EAS56_65:5:262:53:888
+TTGAGACTACAGAGCAACTAGGTAAAAAATTAACA
+>EAS56_65:5:278:848:765
+GTACACACGCTGTCCTATGTACTTATCATGACTCT
+>EAS56_65:5:278:848:765
+TTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTG
+>EAS56_65:5:299:336:613
+ATCCTACTAAATACATATGCACCTAACACAAGACT
+>EAS56_65:5:299:336:613
+TACTCAATATATCCATGTAACAAATCTGCGCTTGT
+>EAS56_65:5:30:92:753
+TATAATACCTCTACATGGCTGATTATGAAAACAAT
+>EAS56_65:5:30:92:753
+TGGATTCTGGGAAATTCTTCATCCTGGACCCTGAG
+>EAS56_65:5:312:985:871
+ATAAAACACATAGCTAAAACTAAAAAAGCAAAAAC
+>EAS56_65:5:312:985:871
+TAAGAGGGATGAGAAATTACCTAATTGGTACAATG
+>EAS56_65:5:37:611:267
+AATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACA
+>EAS56_65:5:37:611:267
+TATAAAGGAAATCCCATAAGAATAACAATGGGCTT
+>EAS56_65:5:75:637:650
+CTGATTATGAAAACAATGTTCCCCAGATACCATCC
+>EAS56_65:5:75:637:650
+GTCTCATCTAGGGGAACAGGGAGGTGCACTAATGC
+>EAS56_65:6:197:759:975
+AAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATA
+>EAS56_65:6:197:759:975
+AATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTC
+>EAS56_65:6:37:610:260
+CAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGT
+>EAS56_65:6:37:610:260
+CCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTC
+>EAS56_65:6:46:173:214
+AACAATGGGCTTCTCAGCAGAAACCTTACAAGCCA
+>EAS56_65:6:46:173:214
+CTAGAGATTTAGACATCTAAATGAAAGAGGCTCAA
+>EAS56_65:6:66:257:524
+ATACATGAGATTATTAGGAAATGCTTTACTGTCAT
+>EAS56_65:6:66:257:524
+GCCTTCAGACCCTACACGAATGCGTCTCTACCACC
+>EAS56_65:6:67:800:450
+TCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGA
+>EAS56_65:6:67:800:450
+TTACAAAATATAGTTGAAAGCTCTAACAATAGACT
+>EAS56_65:6:82:822:767
+AACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAA
+>EAS56_65:6:82:822:767
+TTAGGAAATGCTTTACTGTCATAACTATGAAGAGA
+>EAS56_65:7:118:775:467
+AACAGTAAAATAAAACAAAGGAGGTCATCATACAA
+>EAS56_65:7:118:775:467
+TGTACAATATTCTGATGATGGTTACACTAAAAGCC
+>EAS56_65:7:122:398:994
+GGGATTAAATTCCCCCACTTAAGAGATATAGATTG
+>EAS56_65:7:122:398:994
+TAAAAGTCAAAATTAAAGTTCAATACTCACCATCA
+>EAS56_65:7:219:40:833
+CCCATACTTTACTGCTACTCAATATATCCATGTAA
+>EAS56_65:7:219:40:833
+GGAGGTCATCATACAATGATAAAAAGATCAATTCA
+>EAS56_65:7:288:552:440
+AGAGGGAACGCTTTCAACTCTTCTAGCCATTTCTT
+>EAS56_65:7:288:552:440
+TGTGGTCTGACAGGCTGCAACTGTGAGCCTTCCAT
+>EAS56_65:7:67:692:110
+ATTGCCAGATGAACCACACATTAATACTATGTTTC
+>EAS56_65:7:67:692:110
+GTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGA
+>EAS56_65:8:117:156:84
+GGTTCAGAACTTGAAGACAAGTCTCTTATGAATTA
+>EAS56_65:8:117:156:84
+TGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAG
+>EAS56_65:8:206:563:262
+ATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCT
+>EAS56_65:8:206:563:262
+ATTAGGAAATGCTTTACTGTCATAACTATGAAGAG
+>EAS56_65:8:218:173:667
+CCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGC
+>EAS56_65:8:218:173:667
+TAATGATTCCTCAATGTTAAAATGTCTATTTTTGT
+>EAS56_65:8:24:415:944
+GTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGC
+>EAS56_65:8:24:415:944
+TAGGTAAAAAATTAACATTACAACAGGAACAAAAC
+>EAS56_65:8:275:851:240
+CCCCAGAGGGAAAGCTTTCAACGTTTCTAGCCATT
+>EAS56_65:8:275:851:240
+GTGGTCTGACAGGCTGCAACTGTGAGCCATCACAA
+>EAS56_65:8:317:83:500
+TTTTTTTTTTTTCTTTTCTCCTTTTTTTTTTGTTT
+>EAS56_65:8:64:507:478
+TAATTGAAAAATTCATTTAAGAAATTACAAAATAT
diff --git a/tests/pysam_data/ex1.fq b/tests/pysam_data/faidx_ex1.fq
similarity index 100%
rename from tests/pysam_data/ex1.fq
rename to tests/pysam_data/faidx_ex1.fq
diff --git a/tests/samtools_test.py b/tests/samtools_test.py
index f247373..f48d23e 100644
--- a/tests/samtools_test.py
+++ b/tests/samtools_test.py
@@ -103,9 +103,9 @@ class BinaryTest(unittest.TestCase):
),
"fixmate":
(
- ("ex1.fixmate", "fixmate ex1.bam ex1.fixmate"),
- ("pysam_ex1.fixmate",
- (pysam.fixmate, "pysam_ex1.bam pysam_ex1.fixmate")),
+ ("ex1.fixmate.bam", "fixmate ex1.bam ex1.fixmate.bam"),
+ ("pysam_ex1.fixmate.bam",
+ (pysam.fixmate, "pysam_ex1.bam pysam_ex1.fixmate.bam")),
),
"flagstat":
(
@@ -114,22 +114,24 @@ class BinaryTest(unittest.TestCase):
),
"calmd":
(
- ("ex1.calmd", "calmd ex1.bam ex1.fa > ex1.calmd"),
- ("pysam_ex1.calmd", (pysam.calmd, "pysam_ex1.bam ex1.fa")),
+ ("ex1.calmd.bam", "calmd ex1.bam ex1.fa > ex1.calmd.bam"),
+ ("pysam_ex1.calmd.bam", (pysam.calmd, "pysam_ex1.bam ex1.fa")),
),
"merge":
(
("ex1.merge", "merge -f ex1.merge ex1.bam ex1.bam"),
- # -f option does not work - following command will cause the subsequent
- # command to fail
+ # -f option does not work - following command will
+ # cause the subsequent command to fail
("pysam_ex1.merge",
(pysam.merge, "pysam_ex1.merge pysam_ex1.bam pysam_ex1.bam")),
),
"rmdup":
(
- ("ex1.rmdup", "rmdup ex1.bam ex1.rmdup"),
- ("pysam_ex1.rmdup",
- (pysam.rmdup, "pysam_ex1.bam pysam_ex1.rmdup")),
+ # use -s option, otherwise the following error in samtools 1.2:
+ # Samtools-htslib-API: bam_get_library() not yet implemented
+ ("ex1.rmdup.bam", "rmdup -s ex1.bam ex1.rmdup.bam"),
+ ("pysam_ex1.rmdup.bam",
+ (pysam.rmdup, "pysam_ex1.bam -s pysam_ex1.rmdup.bam")),
),
"reheader":
(
@@ -138,8 +140,9 @@ class BinaryTest(unittest.TestCase):
),
"cat":
(
- ("ex1.cat", "cat ex1.bam ex1.bam > ex1.cat"),
- ("pysam_ex1.cat", (pysam.cat, "ex1.bam ex1.bam")),
+ ("ex1.cat.bam", "cat -o ex1.cat.bam ex1.bam ex1.bam"),
+ ("pysam_ex1.cat.bam",
+ (pysam.cat, " -o pysam_ex1.cat.bam ex1.bam ex1.bam")),
),
"targetcut":
(
@@ -184,26 +187,29 @@ class BinaryTest(unittest.TestCase):
# the samtools commands are executed.
# The first three (faidx, import, index) need to be in that order,
# the rest is arbitrary.
- order = ('faidx', 'import', 'index',
- # 'pileup1', 'pileup2', deprecated
- # 'glfview', deprecated
- 'view', 'view2',
+ order = ('faidx',
+ 'import',
+ 'index',
+ 'view',
+ 'view2',
'sort',
'mpileup',
'depth',
'idxstats',
- # 'fixmate',
+ 'fixmate',
'flagstat',
- # 'calmd',
+ 'calmd',
'merge',
- # 'rmdup',
+ 'rmdup',
'reheader',
'cat',
'bedcov',
'targetcut',
'phase',
- # 'bamshuf',
'bam2fq',
+ # Segmentation fault:
+ # 'bamshuf',
+ # File not binary identical
# 'pad2unpad',
)
@@ -213,6 +219,7 @@ class BinaryTest(unittest.TestCase):
For setup, all commands will be run before the first test is
executed. Individual tests will then just compare the output
files.
+
'''
if BinaryTest.first_time:
@@ -234,12 +241,12 @@ class BinaryTest(unittest.TestCase):
savedir = os.getcwd()
os.chdir(WORKDIR)
for label in self.order:
- # print ("command=", label)
+ sys.stdout.write("preparing test {}".format(label))
command = self.commands[label]
# build samtools command and target and run
samtools_target, samtools_command = command[0]
runSamtools(" ".join((SAMTOOLS, samtools_command)))
-
+ sys.stdout.write(" samtools ok")
# get pysam command and run
try:
pysam_target, pysam_command = command[1]
@@ -248,14 +255,18 @@ class BinaryTest(unittest.TestCase):
(label, command, msg))
pysam_method, pysam_options = pysam_command
-
+
try:
- output = pysam_method(*pysam_options.split(" "), raw=True)
+ output = pysam_method(*pysam_options.split(" "),
+ raw=True,
+ catch_stdout=True)
except pysam.SamtoolsError as msg:
raise pysam.SamtoolsError(
"error while executing %s: options=%s: msg=%s" %
(label, pysam_options, msg))
+ sys.stdout.write(" pysam ok\n")
+
if ">" in samtools_command:
with open(pysam_target, "wb") as outfile:
if type(output) == list:
@@ -309,14 +320,17 @@ class BinaryTest(unittest.TestCase):
def testMpileup(self):
self.checkCommand("mpileup")
+ def testCalmd(self):
+ self.checkCommand("calmd")
+
def testDepth(self):
self.checkCommand("depth")
def testIdxstats(self):
self.checkCommand("idxstats")
- # def testFixmate(self):
- # self.checkCommand("fixmate")
+ def testFixmate(self):
+ self.checkCommand("fixmate")
def testFlagstat(self):
self.checkCommand("flagstat")
@@ -324,8 +338,8 @@ class BinaryTest(unittest.TestCase):
def testMerge(self):
self.checkCommand("merge")
- # def testRmdup(self):
- # self.checkCommand("rmdup")
+ def testRmdup(self):
+ self.checkCommand("rmdup")
def testReheader(self):
self.checkCommand("reheader")
@@ -345,32 +359,21 @@ class BinaryTest(unittest.TestCase):
def testBedcov(self):
self.checkCommand("bedcov")
+ def testView(self):
+ self.checkCommand("view")
+
# def testBamshuf(self):
- # self.checkCommand("bamshuf")
+ # self.checkCommand("bamshuf")
# def testPad2Unpad(self):
- # self.checkCommand("pad2unpad")
-
- # def testPileup1( self ):
- # self.checkCommand( "pileup1" )
-
- # def testPileup2( self ):
- # self.checkCommand( "pileup2" )
-
- # deprecated
- # def testGLFView( self ):
- # self.checkCommand( "glfview" )
-
- def testView(self):
- self.checkCommand("view")
+ # self.checkCommand("pad2unpad")
def testEmptyIndex(self):
self.assertRaises(IOError, pysam.index, "exdoesntexist.bam")
def __del__(self):
if os.path.exists(WORKDIR):
- pass
- # shutil.rmtree( WORKDIR )
+ shutil.rmtree(WORKDIR)
class StdoutTest(unittest.TestCase):
diff --git a/tests/tabix_test.py b/tests/tabix_test.py
index 1ad48ba..961f89a 100644
--- a/tests/tabix_test.py
+++ b/tests/tabix_test.py
@@ -13,6 +13,8 @@ import pysam
import unittest
import glob
import re
+import copy
+from TestUtils import checkURL
DATADIR = 'tabix_data'
@@ -493,6 +495,16 @@ class TestParser(unittest.TestCase):
os.unlink(tmpfilename)
+ def testCopy(self):
+ a = self.tabix.fetch(parser=pysam.asTuple()).next()
+ b = copy.copy(a)
+ self.assertEqual(a, b)
+
+ a = self.tabix.fetch(parser=pysam.asGTF()).next()
+ b = copy.copy(a)
+ self.assertEqual(a, b)
+
+
class TestIterators(unittest.TestCase):
@@ -924,8 +936,6 @@ for vcf_file in vcf_files:
n = "VCFFromVCFTest_%s" % os.path.basename(vcf_file[:-4])
globals()[n] = type(n, (TestVCFFromVCF,), dict(filename=vcf_file,))
-############################################################################
-
class TestRemoteFileHTTP(unittest.TestCase):
@@ -933,16 +943,28 @@ class TestRemoteFileHTTP(unittest.TestCase):
region = "chr1:1-1000"
local = os.path.join(DATADIR, "example.gtf.gz")
+ def setUp(self):
+ self.remote_file = pysam.TabixFile(self.url, "r")
+ self.local_file = pysam.TabixFile(self.local, "r")
+
def testFetchAll(self):
- remote_file = pysam.TabixFile(self.url, "r")
- remote_result = list(remote_file.fetch())
- local_file = pysam.TabixFile(self.local, "r")
- local_result = list(local_file.fetch())
+ if not checkURL(self.url):
+ return
+
+ remote_result = list(self.remote_file.fetch())
+ local_result = list(self.local_file.fetch())
self.assertEqual(len(remote_result), len(local_result))
for x, y in zip(remote_result, local_result):
self.assertEqual(x, y)
+ def testHeader(self):
+ self.assertEqual(list(self.local_file.header), [])
+ self.assertRaises(AttributeError,
+ getattr,
+ self.remote_file,
+ "header")
+
class TestIndexArgument(unittest.TestCase):
@@ -1019,5 +1041,17 @@ class TestMultipleIterators(unittest.TestCase):
self.assertEqual(str(a), str(b))
+class TestContextManager(unittest.TestCase):
+
+ filename = os.path.join(DATADIR, "example.gtf.gz")
+
+ def testManager(self):
+
+ with pysam.TabixFile(self.filename) as tabixfile:
+ tabixfile.fetch()
+ self.assertEqual(tabixfile.closed, True)
+
+
+
if __name__ == "__main__":
unittest.main()
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/python-pysam.git
More information about the debian-med-commit
mailing list