[med-svn] [fastaq] 02/04: Imported Upstream version 3.12.0
Sascha Steinbiss
sascha at steinbiss.name
Thu Apr 14 16:35:27 UTC 2016
This is an automated email from the git hooks/post-receive script.
sascha-guest pushed a commit to branch master
in repository fastaq.
commit 0bd3dc285daaa6028ad33363f33b35c69aa64e9f
Author: Sascha Steinbiss <sascha at steinbiss.name>
Date: Thu Apr 14 16:29:17 2016 +0000
Imported Upstream version 3.12.0
---
pyfastaq/common.py | 2 +-
pyfastaq/sequences.py | 34 +++++++++++++++++
pyfastaq/tests/sequences_test.py | 82 ++++++++++++++++++++++++++++++++++++++++
setup.py | 2 +-
4 files changed, 118 insertions(+), 2 deletions(-)
diff --git a/pyfastaq/common.py b/pyfastaq/common.py
index e27854a..1c5007f 100644
--- a/pyfastaq/common.py
+++ b/pyfastaq/common.py
@@ -1 +1 @@
-version = '3.11.1'
+version = '3.12.0'
diff --git a/pyfastaq/sequences.py b/pyfastaq/sequences.py
index f757719..55dc7e5 100644
--- a/pyfastaq/sequences.py
+++ b/pyfastaq/sequences.py
@@ -1,3 +1,4 @@
+import copy
import re
import string
import random
@@ -350,6 +351,25 @@ class Fasta:
and self.seq[0:3].upper() in genetic_codes.starts[genetic_code]
+ def make_into_gene(self):
+ '''Tries to make into a gene sequence. Tries all three reading frames and both strands. Returns a tuple (new sequence, strand, frame) if it was successful. Otherwise returns None.'''
+ for reverse in [True, False]:
+ for frame in range(3):
+ new_seq = copy.copy(self)
+ if reverse:
+ new_seq.revcomp()
+ new_seq.seq = new_seq[frame:]
+ if len(new_seq) % 3:
+ new_seq.seq = new_seq.seq[:-(len(new_seq) % 3)]
+
+ new_aa_seq = new_seq.translate()
+ if len(new_aa_seq) >= 2 and new_seq[0:3] in genetic_codes.starts[genetic_code] and new_aa_seq[-1] == '*' and '*' not in new_aa_seq[:-1]:
+ strand = '-' if reverse else '+'
+ return new_seq, strand, frame
+
+ return None
+
+
# Fills the object with the next sequence in the file. Returns
# True if this was successful, False if no more sequences in the file.
# If reading a file of quality scores, set read_quals = True
@@ -594,6 +614,20 @@ class Fastq(Fasta):
fa = super().translate()
return Fastq(fa.id, fa.seq, 'I'*len(fa.seq))
+ def make_into_gene(self):
+ got = super().make_into_gene()
+ if got is None:
+ return None
+ seq, strand, frame = got
+ new_seq = copy.copy(self)
+
+ if strand == '-':
+ new_seq.revcomp()
+
+ new_seq.seq = new_seq.seq[frame:frame + len(seq)]
+ new_seq.qual = new_seq.qual[frame:frame + len(seq)]
+ return new_seq, strand, frame
+
def _orfs_from_aa_seq(seq):
orfs = []
diff --git a/pyfastaq/tests/sequences_test.py b/pyfastaq/tests/sequences_test.py
index 8663422..2cc2396 100644
--- a/pyfastaq/tests/sequences_test.py
+++ b/pyfastaq/tests/sequences_test.py
@@ -265,7 +265,89 @@ class TestFasta(unittest.TestCase):
self.assertFalse(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
sequences.genetic_code = 11
self.assertTrue(sequences.Fasta('ID', 'ATTCAGTAA').looks_like_gene())
+ sequences.genetic_code = 1
+
+
+ def test_make_into_gene_fasta(self):
+ '''Test make_into_gene fasta'''
+ print('sequences.genetic_code', sequences.genetic_code)
+ tests = [
+ (sequences.Fasta('ID', 'T'), None),
+ (sequences.Fasta('ID', 'TT'), None),
+ (sequences.Fasta('ID', 'TTT'), None),
+ (sequences.Fasta('ID', 'TTG'), None),
+ (sequences.Fasta('ID', 'TAA'), None),
+ (sequences.Fasta('ID', 'TTGAAATAA'), (sequences.Fasta('ID', 'TTGAAATAA'), '+', 0)),
+ (sequences.Fasta('ID', 'TTGAAATAT'), None),
+ (sequences.Fasta('ID', 'TTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+ (sequences.Fasta('ID', 'TTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+ (sequences.Fasta('ID', 'TTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 0)),
+ (sequences.Fasta('ID', 'TTGTAAAAA'), None),
+ (sequences.Fasta('ID', 'ATTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+ (sequences.Fasta('ID', 'ATTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+ (sequences.Fasta('ID', 'ATTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 1)),
+ (sequences.Fasta('ID', 'ATTGTAAAAA'), None),
+ (sequences.Fasta('ID', 'AATTGTAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+ (sequences.Fasta('ID', 'AATTGTAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+ (sequences.Fasta('ID', 'AATTGTAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '+', 2)),
+ (sequences.Fasta('ID', 'AATTGTAAAAA'), None),
+ (sequences.Fasta('ID', 'TTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+ (sequences.Fasta('ID', 'ATTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+ (sequences.Fasta('ID', 'AATTACAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 0)),
+ (sequences.Fasta('ID', 'AAATTACAA'), None),
+ (sequences.Fasta('ID', 'TTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+ (sequences.Fasta('ID', 'ATTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+ (sequences.Fasta('ID', 'AATTACAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 1)),
+ (sequences.Fasta('ID', 'AAATTACAAA'), None),
+ (sequences.Fasta('ID', 'TTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+ (sequences.Fasta('ID', 'ATTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+ (sequences.Fasta('ID', 'AATTACAAAA'), (sequences.Fasta('ID', 'TTGTAA'), '-', 2)),
+ (sequences.Fasta('ID', 'AAATTACAAAA'), None),
+ ]
+
+ for seq, expected in tests:
+ self.assertEqual(seq.make_into_gene(), expected)
+
+
+ def test_make_into_gene_fastq(self):
+ '''Test make_into_gene fastq'''
+ print('sequences.genetic_code', sequences.genetic_code)
+ tests = [
+ (sequences.Fastq('ID', 'T', '1'), None),
+ (sequences.Fastq('ID', 'TT', '12'), None),
+ (sequences.Fastq('ID', 'TTT', '123'), None),
+ (sequences.Fastq('ID', 'TTG', '123'), None),
+ (sequences.Fastq('ID', 'TAA', '123'), None),
+ (sequences.Fastq('ID', 'TTGAAATAA', '123456789'), (sequences.Fastq('ID', 'TTGAAATAA', '123456789'), '+', 0)),
+ (sequences.Fastq('ID', 'TTGAAATAT', '123456789'), None),
+ (sequences.Fastq('ID', 'TTGTAA', '123456'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+ (sequences.Fastq('ID', 'TTGTAAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+ (sequences.Fastq('ID', 'TTGTAAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '123456'), '+', 0)),
+ (sequences.Fastq('ID', 'TTGTAAAAA', '123456789'), None),
+ (sequences.Fastq('ID', 'ATTGTAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+ (sequences.Fastq('ID', 'ATTGTAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+ (sequences.Fastq('ID', 'ATTGTAAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '234567'), '+', 1)),
+ (sequences.Fastq('ID', 'ATTGTAAAAA', '123456789A'), None),
+ (sequences.Fastq('ID', 'AATTGTAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+ (sequences.Fastq('ID', 'AATTGTAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+ (sequences.Fastq('ID', 'AATTGTAAAA', '123456789A'), (sequences.Fastq('ID', 'TTGTAA', '345678'), '+', 2)),
+ (sequences.Fastq('ID', 'AATTGTAAAAA', '123456789AB'), None),
+ (sequences.Fastq('ID', 'TTACAA', '123456'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 0)),
+ (sequences.Fastq('ID', 'ATTACAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 0)),
+ (sequences.Fastq('ID', 'AATTACAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 0)),
+ (sequences.Fastq('ID', 'AAATTACAA', '123456789'), None),
+ (sequences.Fastq('ID', 'TTACAAA', '1234567'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 1)),
+ (sequences.Fastq('ID', 'ATTACAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 1)),
+ (sequences.Fastq('ID', 'AATTACAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 1)),
+ (sequences.Fastq('ID', 'AAATTACAAA', '123456789A'), None),
+ (sequences.Fastq('ID', 'TTACAAAA', '12345678'), (sequences.Fastq('ID', 'TTGTAA', '654321'), '-', 2)),
+ (sequences.Fastq('ID', 'ATTACAAAA', '123456789'), (sequences.Fastq('ID', 'TTGTAA', '765432'), '-', 2)),
+ (sequences.Fastq('ID', 'AATTACAAAA', '123456789A'), (sequences.Fastq('ID', 'TTGTAA', '876543'), '-', 2)),
+ (sequences.Fastq('ID', 'AAATTACAAAA', '123456789AB'), None),
+ ]
+ for seq, expected in tests:
+ self.assertEqual(seq.make_into_gene(), expected)
def test_is_all_Ns(self):
'''Test is_all_Ns()'''
diff --git a/setup.py b/setup.py
index f20b8a1..50533b3 100644
--- a/setup.py
+++ b/setup.py
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
setup(
name='pyfastaq',
- version='3.11.1',
+ version='3.12.0',
description='Script to manipulate FASTA and FASTQ files, plus API for developers',
packages = find_packages(),
author='Martin Hunt',
--
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-med/fastaq.git
More information about the debian-med-commit
mailing list