[med-svn] [Git][med-team/python-pyfaidx][upstream] New upstream version 0.5.4

Tue May 29 08:11:22 BST 2018

Andreas Tille pushed to branch upstream at Debian Med / python-pyfaidx


Commits:
d64af6ce by Andreas Tille at 2018-05-29T08:55:52+02:00
New upstream version 0.5.4
- - - - -


4 changed files:

- .travis.yml
- README.rst
- pyfaidx/__init__.py
- tests/test_FastaRecord.py


Changes:

=====================================
.travis.yml
=====================================

--- a/.travis.yml
+++ b/.travis.yml
@@ -11,8 +11,8 @@ python:
     - 'pypy'
     - 'pypy3'
 install:
-    - pip wheel -f wheelhouse coverage biopython cython pysam pyvcf || true
-    - pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf || true
+    - pip wheel -f wheelhouse coverage biopython cython pysam pyvcf numpy || true
+    - pip install -f wheelhouse biopython cython pysam pyfasta coverage pyvcf numpy || true
     - python setup.py install
     - if [ ! -f samtools-1.2 ]; then curl -sL https://github.com/samtools/samtools/releases/download/1.2/samtools-1.2.tar.bz2 | tar -xjv; fi
     - cd samtools-1.2


=====================================
README.rst
=====================================
--- a/README.rst
+++ b/README.rst
@@ -110,7 +110,7 @@ Slices just like a string:
     >NM_001282543.1:1-5466
     CCCCGCCCCT........
 
-- Slicing start and end coordinates are 0-based, just like Python sequences.  
+- Slicing start and end coordinates are 0-based, just like Python sequences.
 
 Complements and reverse complements just like DNA
 
@@ -127,7 +127,7 @@ Complements and reverse complements just like DNA
     >>> -genes['NM_001282543.1'][200:230]
     >NM_001282543.1 (complement):230-201
     CATCCGGTTCCATGGCGGGCGCGGAACGAG
-    
+
 ``Fasta`` objects can also be accessed using method calls:
 
 .. code:: python
@@ -135,16 +135,16 @@ Complements and reverse complements just like DNA
     >>> genes.get_seq('NM_001282543.1', 201, 210)
     >NM_001282543.1:201-210
     CTCGTTCCGC
-    
+
     >>> genes.get_seq('NM_001282543.1', 201, 210, rc=True)
     >NM_001282543.1 (complement):210-201
     GCGGAACGAG
-    
+
 Spliced sequences can be retrieved from a list of [start, end] coordinates:
 **TODO** update this section
 
 .. code:: python
-    
+
     # new in v0.5.1
     segments = [[1, 10], [50, 70]]
     >>> genes.get_spliced_seq('NM_001282543.1', segments)
@@ -261,6 +261,17 @@ Sequence names are truncated on any whitespace. This is a limitation of the inde
     gi|557361099|gb|KF435150.1| Homo sapiens MDM4 protein variant Y (MDM4) mRNA, complete cds, alternatively spliced
     gi|557361097|gb|KF435149.1| Homo sapiens MDM4 protein variant G (MDM4) mRNA, complete cds
 
+Records can be accessed efficiently as numpy arrays:
+
+.. code:: python
+
+    # new in v0.5.4
+    >>> from pyfaidx import Fasta
+    >>> import numpy as np
+    >>> genes = Fasta('tests/data/genes.fasta')
+    >>> np.asarray(genes['NM_001282543.1'])
+    array(['C', 'C', 'C', ..., 'A', 'A', 'A'], dtype='|S1')
+
 Sequence can be buffered in memory using a read-ahead buffer
 for fast sequential access:
 


=====================================
pyfaidx/__init__.py
=====================================
--- a/pyfaidx/__init__.py
+++ b/pyfaidx/__init__.py
@@ -5,6 +5,7 @@ Fasta file -> Faidx -> Fasta -> FastaRecord -> Sequence
 
 from __future__ import division
 import os
+import sys
 from os.path import getmtime
 from six import PY2, PY3, string_types, integer_types
 from six.moves import zip_longest
@@ -19,9 +20,12 @@ import warnings
 from math import ceil
 from threading import Lock
 
+if sys.version_info > (3, ):
+    buffer = memoryview
+
 dna_bases = re.compile(r'([ACTGNactgnYRWSKMDVHBXyrwskmdvhbx]+)')
 
-__version__ = '0.5.3.1'
+__version__ = '0.5.4'
 
 
 class KeyFunctionError(ValueError):
@@ -762,6 +766,7 @@ class Faidx(object):
 
 class FastaRecord(object):
     __slots__ = ['name', '_fa']
+
     def __init__(self, name, fa):
         self.name = name
         self._fa = fa
@@ -887,6 +892,16 @@ class FastaRecord(object):
         """ Read the actual defline from self._fa.faidx mdshw5/pyfaidx#54 """
         return self._fa.faidx.get_long_name(self.name)
 
+    @property
+    def __array_interface__(self):
+        """ Implement numpy array interface for issue #139"""
+        return {
+            'shape': (len(self), ),
+            'typestr': '|S1',
+            'version': 3,
+            'data': buffer(str(self).encode('ascii'))
+        }
+
 
 class MutableFastaRecord(FastaRecord):
     def __init__(self, name, fa):


=====================================
tests/test_FastaRecord.py
=====================================
--- a/tests/test_FastaRecord.py
+++ b/tests/test_FastaRecord.py
@@ -9,6 +9,7 @@ from difflib import Differ
 path = os.path.dirname(__file__)
 os.chdir(path)
 
+
 class TestFastaRecord(TestCase):
     def setUp(self):
         pass
@@ -28,8 +29,9 @@ class TestFastaRecord(TestCase):
         reference_upper = Fasta(filename, sequence_always_upper=True)
         reference_normal = Fasta(filename)
         os.remove('data/genes.fasta.lower.fai')
-        assert reference_upper['gi|557361099|gb|KF435150.1|'][1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][1:100].seq.upper()
-
+        assert reference_upper['gi|557361099|gb|KF435150.1|'][
+            1:100].seq == reference_normal['gi|557361099|gb|KF435150.1|'][
+                1:100].seq.upper()
 
     def test_long_names(self):
         """ Test that deflines extracted using FastaRecord.long_name are
@@ -60,7 +62,8 @@ class TestFastaRecord(TestCase):
                         line_len = len(line)
                         fasta_uniform_len.write(line)
                     elif line_len > len(line):
-                        fasta_uniform_len.write(line.rstrip() + b'N' * (line_len - len(line)) + b'\n')
+                        fasta_uniform_len.write(line.rstrip() + b'N' *
+                                                (line_len - len(line)) + b'\n')
                     else:
                         fasta_uniform_len.write(line)
         fasta = Fasta('data/issue_62.fa', as_raw=True)
@@ -93,11 +96,19 @@ class TestFastaRecord(TestCase):
         fasta = Fasta(filename)
         expect = 20
         result = fasta["test_padded"].unpadded_len
-        print (expect, result)
+        print(expect, result)
         assert expect == result
         os.remove('data/padded.fasta')
         os.remove('data/padded.fasta.fai')
 
+    def test_numpy_array(self):
+        """ Test the __array_interface__ """
+        import numpy
+        filename = "data/genes.fasta.lower"
+        reference = Fasta(filename)
+        np_array = numpy.asarray(reference[0])
+        assert isinstance(np_array, numpy.ndarray)
+
 
 class TestMutableFastaRecord(TestCase):
     def setUp(self):
@@ -124,7 +135,8 @@ class TestMutableFastaRecord(TestCase):
         fasta = Fasta('data/genes.fasta', mutable=False)
         chunk = fasta['gi|557361099|gb|KF435150.1|'][0:100]
         mutable['gi|557361099|gb|KF435150.1|'][0:100] = chunk.seq
-        assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(mutable['gi|557361099|gb|KF435150.1|'])
+        assert str(fasta['gi|557361099|gb|KF435150.1|']) == str(
+            mutable['gi|557361099|gb|KF435150.1|'])
 
     def test_mutate_fasta_to_N(self):
         mutable = Fasta('data/genes_mutable.fasta', mutable=True)



View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/commit/d64af6ce685b3e4cebc3d79a084317efd92af4bd

-- 
View it on GitLab: https://salsa.debian.org/med-team/python-pyfaidx/commit/d64af6ce685b3e4cebc3d79a084317efd92af4bd
You're receiving this email because of your account on salsa.debian.org.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://alioth-lists.debian.net/pipermail/debian-med-commit/attachments/20180529/c1ce577d/attachment-0001.html>