[med-svn] [fastaq] 02/04: Imported Upstream version 3.12.0

Sascha Steinbiss sascha at steinbiss.name
Thu Apr 14 16:35:27 UTC 2016


This is an automated email from the git hooks/post-receive script.

sascha-guest pushed a commit to branch master
in repository fastaq.

commit 0bd3dc285daaa6028ad33363f33b35c69aa64e9f
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date:   Thu Apr 14 16:29:17 2016 +0000

    Imported Upstream version 3.12.0
---
 pyfastaq/common.py               |  2 +-
 pyfastaq/sequences.py            | 34 +++++++++++++++++
 pyfastaq/tests/sequences_test.py | 82 ++++++++++++++++++++++++++++++++++++++++
 setup.py                         |  2 +-
 4 files changed, 118 insertions(+), 2 deletions(-)

diff --git a/pyfastaq/common.py b/pyfastaq/common.py
index e27854a..1c5007f 100644
--- a/pyfastaq/common.py
+++ b/pyfastaq/common.py
@@ -1 +1 @@
-version = '3.11.1'
+version = '3.12.0'
diff --git a/pyfastaq/sequences.py b/pyfastaq/sequences.py
index f757719..55dc7e5 100644
--- a/pyfastaq/sequences.py
+++ b/pyfastaq/sequences.py
@@ -1,3 +1,4 @@
+import copy
 import re
 import string
 import random
@@ -350,6 +351,25 @@ class Fasta:
           and self.seq[0:3].upper() in genetic_codes.starts[genetic_code]
 
 
+    def make_into_gene(self):
+        '''Tries to make into a gene sequence. Tries all three reading frames and both strands. Returns a tuple (new sequence, strand, frame) if it was successful. Otherwise returns None.'''
+        for reverse in [True, False]:
+            for frame in range(3):
+                new_seq = copy.copy(self)
+                if reverse:
+                    new_seq.revcomp()
+                new_seq.seq = new_seq[frame:]
+                if len(new_seq) % 3:
+                    new_seq.seq = new_seq.seq[:-(len(new_seq) % 3)]
+
+                new_aa_seq = new_seq.translate()
+                if len(new_aa_seq) >= 2 and new_seq[0:3] in genetic_codes.starts[genetic_code] and new_aa_seq[-1] == '*' and '*' not in new_aa_seq[:-1]:
+                    strand = '-' if reverse else '+'
+                    return new_seq, strand, frame
+
+        return None
+
+
     # Fills the object with the next sequence in the file. Returns
     # True if this was successful, False if no more sequences in the file.
     # If reading a file of quality scores, set read_quals = True
@@ -594,6 +614,20 @@ class Fastq(Fasta):
         fa = super().translate()
         return Fastq(fa.id, fa.seq, 'I'*len(fa.seq))
 
+    def make_into_gene(self):
+        got = super().make_into_gene()
+        if got is None:
+            return None
+        seq, strand, frame = got
+        new_seq = copy.copy(self)
+
+        if strand == '-':
+            new_seq.revcomp()
+
+        new_seq.seq = new_seq.seq[frame:frame + len(seq)]
+        new_seq.qual = new_seq.qual[frame:frame + len(seq)]
+        return new_seq, strand, frame
+
 
 def _orfs_from_aa_seq(seq):
     orfs = []
diff --git a/pyfastaq/tests/sequences_test.py b/pyfastaq/tests/sequences_test.py
index 8663422..2cc2396 100644
--- a/pyfastaq/tests/sequences_test.py
+++ b/pyfastaq/tests/sequences_test.py
@@ -265,7 +265,89 @@ class TestFasta(unittest.TestCase):
         self.assertFalse(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
         sequences.genetic_code = 11
         self.assertTrue(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
+        sequences.genetic_code = 1
+
+
+    def test_make_into_gene_fasta(self):
+        '''Test make_into_gene fasta'''
+        print('sequences.genetic_code', sequences.genetic_code)
+        tests = [
+            (sequences.Fasta('ID', 'T'), None),
+            (sequences.Fasta('ID', 'TT'), None),
+            (sequences.Fasta('ID', 'TTT'), None),
+            (sequences.Fasta('ID', 'TTG'), None),
+            (sequences.Fasta('ID', 'TAA'), None),
+            (sequences.Fasta('ID', 'TTGAAATAA'), (sequences.Fasta('ID', 'TTGAAATAA'), '+', 0)),
+            (sequences.Fasta('ID', 'TTGAAATAT'), None),
+            (sequences.Fasta('ID', 'TTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+            (sequences.Fasta('ID', 'TTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+            (sequences.Fasta('ID', 'TTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+            (sequences.Fasta('ID', 'TTGTAAAAA'), None),
+            (sequences.Fasta('ID', 'ATTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+            (sequences.Fasta('ID', 'ATTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+            (sequences.Fasta('ID', 'ATTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+            (sequences.Fasta('ID', 'ATTGTAAAAA'), None),
+            (sequences.Fasta('ID', 'AATTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+            (sequences.Fasta('ID', 'AATTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+            (sequences.Fasta('ID', 'AATTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+            (sequences.Fasta('ID', 'AATTGTAAAAA'), None),
+            (sequences.Fasta('ID', 'TTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+            (sequences.Fasta('ID', 'ATTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+            (sequences.Fasta('ID', 'AATTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+            (sequences.Fasta('ID', 'AAATTACAA'), None),
+            (sequences.Fasta('ID', 'TTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+            (sequences.Fasta('ID', 'ATTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+            (sequences.Fasta('ID', 'AATTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+            (sequences.Fasta('ID', 'AAATTACAAA'), None),
+            (sequences.Fasta('ID', 'TTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+            (sequences.Fasta('ID', 'ATTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+            (sequences.Fasta('ID', 'AATTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+            (sequences.Fasta('ID', 'AAATTACAAAA'), None),
+        ]
+
+        for seq, expected in tests:
+            self.assertEqual(seq.make_into_gene(), expected)
+
+
+    def test_make_into_gene_fastq(self):
+        '''Test make_into_gene fastq'''
+        print('sequences.genetic_code', sequences.genetic_code)
+        tests = [
+            (sequences.Fastq('ID', 'T', '1'), None),
+            (sequences.Fastq('ID', 'TT', '12'), None),
+            (sequences.Fastq('ID', 'TTT', '123'), None),
+            (sequences.Fastq('ID', 'TTG', '123'), None),
+            (sequences.Fastq('ID', 'TAA', '123'), None),
+            (sequences.Fastq('ID', 'TTGAAATAA', '123456789'), (sequences.Fastq('ID', 'TTGAAATAA', '123456789'), '+', 0)),
+            (sequences.Fastq('ID', 'TTGAAATAT', '123456789'), None),
+            (sequences.Fastq('ID', 'TTGTAA', '123456'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+            (sequences.Fastq('ID', 'TTGTAAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+            (sequences.Fastq('ID', 'TTGTAAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+            (sequences.Fastq('ID', 'TTGTAAAAA', '123456789'), None),
+            (sequences.Fastq('ID', 'ATTGTAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+            (sequences.Fastq('ID', 'ATTGTAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+            (sequences.Fastq('ID', 'ATTGTAAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+            (sequences.Fastq('ID', 'ATTGTAAAAA', '123456789A'), None),
+            (sequences.Fastq('ID', 'AATTGTAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+            (sequences.Fastq('ID', 'AATTGTAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+            (sequences.Fastq('ID', 'AATTGTAAAA', '123456789A'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+            (sequences.Fastq('ID', 'AATTGTAAAAA', '123456789AB'), None),
+            (sequences.Fastq('ID', 'TTACAA', '123456'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 0)),
+            (sequences.Fastq('ID', 'ATTACAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 0)),
+            (sequences.Fastq('ID', 'AATTACAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 0)),
+            (sequences.Fastq('ID', 'AAATTACAA', '123456789'), None),
+            (sequences.Fastq('ID', 'TTACAAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 1)),
+            (sequences.Fastq('ID', 'ATTACAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 1)),
+            (sequences.Fastq('ID', 'AATTACAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 1)),
+            (sequences.Fastq('ID', 'AAATTACAAA', '123456789A'), None),
+            (sequences.Fastq('ID', 'TTACAAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 2)),
+            (sequences.Fastq('ID', 'ATTACAAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 2)),
+            (sequences.Fastq('ID', 'AATTACAAAA', '123456789A'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 2)),
+            (sequences.Fastq('ID', 'AAATTACAAAA', '123456789AB'), None),
+        ]
 
+        for seq, expected in tests:
+            self.assertEqual(seq.make_into_gene(), expected)
 
     def test_is_all_Ns(self):
         '''Test is_all_Ns()'''
diff --git a/setup.py b/setup.py
index f20b8a1..50533b3 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 
 setup(
     name='pyfastaq',
-    version='3.11.1',
+    version='3.12.0',
     description='Script to manipulate FASTA and FASTQ files, plus API for developers',
     packages = find_packages(),
     author='Martin Hunt',

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git



More information about the debian-med-commit mailing list