[med-svn] [Git][med-team/python-pyfaidx][upstream] New upstream version 0.5.4
Andreas Tille
gitlab at salsa.debian.org
Tue May 29 08:11:22 BST 2018
Andreas Tille pushed to branch upstream at Debian Med / python-pyfaidx
Commits:
d64af6ce by Andreas Tille at 2018-05-29T08:55:52+02:00
New upstream version 0.5.4
- - - - -
4 changed files:
- .travis.yml
- README.rst
- pyfaidx/__init__.py
- tests/test_FastaRecord.py
Changes:
=====================================
.travis.yml
=====================================
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,8 +11,8 @@ python:
- 'pypy'
- 'pypy3'
install:
- - pip wheel -f wheelhouse coverage biopython cython pysam pyvcf || true
- - pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf || true
+ - pip wheel -f wheelhouse coverage biopython cython pysam pyvcf numpy || true
+ - pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf numpy || true
- python setup.py install
- if [ ! -f samtools-1.2 ]; then curl -sL https://github.com/samtools/samtools/releases/download/1.2/samtools-1.2.tar.bz2 | tar -xjv; fi
- cd samtools-1.2
=====================================
README.rst
=====================================
--- a/README.rst
+++ b/README.rst
@@ -110,7 +110,7 @@ Slices just like a string:
>NM_001282543.1:1-5466
CCCCGCCCCT........
-- Slicing start and end coordinates are 0-based, just like Python sequences.
+- Slicing start and end coordinates are 0-based, just like Python sequences.
Complements and reverse complements just like DNA
@@ -127,7 +127,7 @@ Complements and reverse complements just like DNA
>>> -genes['NM_001282543.1'][200:230]
>NM_001282543.1 (complement):230-201
CATCCGGTTCCATGGCGGGCGCGGAACGAG
-
+
``Fasta`` objects can also be accessed using method calls:
.. code:: python
@@ -135,16 +135,16 @@ Complements and reverse complements just like DNA
>>> genes.get_seq('NM_001282543.1', 201, 210)
>NM_001282543.1:201-210
CTCGTTCCGC
-
+
>>> genes.get_seq('NM_001282543.1', 201, 210, rc=True)
>NM_001282543.1 (complement):210-201
GCGGAACGAG
-
+
Spliced sequences can be retrieved from a list of [start, end] coordinates:
**TODO** update this section
.. code:: python
-
+
# new in v0.5.1
segments = [[1, 10], [50, 70]]
>>> genes.get_spliced_seq('NM_001282543.1', segments)
@@ -261,6 +261,17 @@ Sequence names are truncated on any whitespace. This is a limitation of the inde
gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced
gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds
+Records can be accessed efficiently as numpy arrays:
+
+.. code:: python
+
+ # new in v0.5.4
+ >>> from pyfaidx import Fasta
+ >>> import numpy as np
+ >>> genes = Fasta('tests/data/genes.fasta')
+ >>> np.asarray(genes['NM_001282543.1'])
+ array(['C', 'C', 'C', ..., 'A', 'A', 'A'], dtype='|S1')
+
Sequence can be buffered in memory using a read-ahead buffer
for fast sequential access:
=====================================
pyfaidx/__init__.py
=====================================
--- a/pyfaidx/__init__.py
+++ b/pyfaidx/__init__.py
@@ -5,6 +5,7 @@ Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
from __future__ import division
import os
+import sys
from os.path import getmtime
from six import PY2, PY3, string_types, integer_types
from six.moves import zip_longest
@@ -19,9 +20,12 @@ import warnings
from math import ceil
from threading import Lock
+if sys.version_info > (3, ):
+ buffer = memoryview
+
dna_bases = re.compile(r'([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)')
-__version__ = '0.5.3.1'
+__version__ = '0.5.4'
class KeyFunctionError(ValueError):
@@ -762,6 +766,7 @@ class Faidx(object):
class FastaRecord(object):
__slots__ = ['name', '_fa']
+
def __init__(self, name, fa):
self.name = name
self._fa = fa
@@ -887,6 +892,16 @@ class FastaRecord(object):
""" Read the actual defline from self._fa.faidx mdshw5/pyfaidx#54 """
return self._fa.faidx.get_long_name(self.name)
+ @property
+ def __array_interface__(self):
+ """ Implement numpy array interface for issue #139"""
+ return {
+ 'shape': (len(self), ),
+ 'typestr': '|S1',
+ 'version': 3,
+ 'data': buffer(str(self).encode('ascii'))
+ }
+
class MutableFastaRecord(FastaRecord):
def __init__(self, name, fa):
=====================================
tests/test_FastaRecord.py
=====================================
--- a/tests/test_FastaRecord.py
+++ b/tests/test_FastaRecord.py
@@ -9,6 +9,7 @@ from difflib import Differ
path = os.path.dirname(__file__)
os.chdir(path)
+
class TestFastaRecord(TestCase):
def setUp(self):
pass
@@ -28,8 +29,9 @@ class TestFastaRecord(TestCase):
reference_upper = Fasta(filename, sequence_always_upper=True)
reference_normal = Fasta(filename)
os.remove('data/genes.fasta.lower.fai')
- assert reference_upper['gi|557361099|gb|KF435150.1|'][1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][1:100].seq.upper()
-
+ assert reference_upper['gi|557361099|gb|KF435150.1|'][
+ 1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][
+ 1:100].seq.upper()
def test_long_names(self):
""" Test that deflines extracted using FastaRecord.long_name are
@@ -60,7 +62,8 @@ class TestFastaRecord(TestCase):
line_len = len(line)
fasta_uniform_len.write(line)
elif line_len > len(line):
- fasta_uniform_len.write(line.rstrip() + b'N' * (line_len - len(line)) + b'\n')
+ fasta_uniform_len.write(line.rstrip() + b'N' *
+ (line_len - len(line)) + b'\n')
else:
fasta_uniform_len.write(line)
fasta = Fasta('data/issue_62.fa', as_raw=True)
@@ -93,11 +96,19 @@ class TestFastaRecord(TestCase):
fasta = Fasta(filename)
expect = 20
result = fasta["test_padded"].unpadded_len
- print (expect, result)
+ print(expect, result)
assert expect == result
os.remove('data/padded.fasta')
os.remove('data/padded.fasta.fai')
+ def test_numpy_array(self):
+ """ Test the __array_interface__ """
+ import numpy
+ filename = "data/genes.fasta.lower"
+ reference = Fasta(filename)
+ np_array = numpy.asarray(reference[0])
+ assert isinstance(np_array, numpy.ndarray)
+
class TestMutableFastaRecord(TestCase):
def setUp(self):
@@ -124,7 +135,8 @@ class TestMutableFastaRecord(TestCase):
fasta = Fasta('data/genes.fasta', mutable=False)
chunk = fasta['gi|557361099|gb|KF435150.1|'][0:100]
mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk.seq
- assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(mutable['gi|557361099|gb|KF435150.1|'])
+ assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(
+ mutable['gi|557361099|gb|KF435150.1|'])
def test_mutate_fasta_to_N(self):
mutable = Fasta('data/genes_mutable.fasta', mutable=True)
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/commit/d64af6ce685b3e4cebc3d79a084317efd92af4bd
--
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/commit/d64af6ce685b3e4cebc3d79a084317efd92af4bd
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180529/c1ce577d/attachment-0001.html>
More information about the debian-med-commit
mailing list